Search in sources :

Example 26 with ParseException

use of in project druid by druid-io.

the class IncrementalIndex method add.

 * Adds a new row.  The row might correspond with another row that already exists, in which case this will
 * update that row instead of inserting a new one.
 * <p>
 * <p>
 * Calls to add() are thread safe.
 * <p>
 * @param row                      the row of data to add
 * @param skipMaxRowsInMemoryCheck whether or not to skip the check of rows exceeding the max rows limit
 * @return the number of rows in the data set after adding the InputRow. If any parse failure occurs, a {@link ParseException} is returned in {@link IncrementalIndexAddResult}.
 * @throws IndexSizeExceededException this exception is thrown once it reaches max rows limit and skipMaxRowsInMemoryCheck is set to false.
public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException {
    IncrementalIndexRowResult incrementalIndexRowResult = toIncrementalIndexRow(row);
    final AddToFactsResult addToFactsResult = addToFacts(row, incrementalIndexRowResult.getIncrementalIndexRow(), in, rowSupplier, skipMaxRowsInMemoryCheck);
    @Nullable ParseException parseException = getCombinedParseException(row, incrementalIndexRowResult.getParseExceptionMessages(), addToFactsResult.getParseExceptionMessages());
    return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), addToFactsResult.getBytesInMemory(), parseException);
Also used : UnparseableColumnsParseException( ParseException( Nullable(javax.annotation.Nullable)

Example 27 with ParseException

use of in project druid by druid-io.

the class OnheapIncrementalIndex method doAggregate.

 * Performs aggregation for all of the aggregators.
 * @return Total incremental memory in bytes required by this step of the
 * aggregation. The returned value is non-zero only if
 * {@link #useMaxMemoryEstimates} is false.
private long doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, List<String> parseExceptionsHolder) {
    long totalIncrementalBytes = 0L;
    for (int i = 0; i < aggs.length; i++) {
        final Aggregator agg = aggs[i];
        synchronized (agg) {
            try {
                if (useMaxMemoryEstimates) {
                } else {
                    totalIncrementalBytes += agg.aggregateWithSize();
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
    return totalIncrementalBytes;
Also used : PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) ParseException(

Example 28 with ParseException

use of in project druid by druid-io.

the class Plumbers method addNextRow.

public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) throws IOException {
    final InputRow inputRow;
    try {
        inputRow = firehose.nextRow();
    } catch (ParseException e) {
        if (reportParseExceptions) {
            throw e;
        } else {
            log.debug(e, "Discarded row due to exception, considering unparseable.");
    if (inputRow == null) {
        log.debug("Discarded null row, considering thrownAway.");
    final IncrementalIndexAddResult addResult;
    try {
        addResult = plumber.add(inputRow, committerSupplier);
    } catch (IndexSizeExceededException e) {
        // plumber.add should be swapping out indexes before they fill up.
        throw new ISE(e, "Index size exceeded");
    if (addResult.getRowCount() == -1) {
        log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded());
    if (addResult.getRowCount() == -2) {
        log.debug("Discarded row[%s], considering duplication.", inputRow);
Also used : IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow( ISE( ParseException( IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 29 with ParseException

use of in project druid by druid-io.

the class KafkaInputReader method buildBlendedRows.

private CloseableIterator<InputRow> buildBlendedRows(InputEntityReader valueParser, Map<String, Object> headerKeyList) throws IOException {
    return -> {
        MapBasedInputRow valueRow;
        try {
            // Return type for the value parser should be of type MapBasedInputRow
            // Parsers returning other types are not compatible currently.
            valueRow = (MapBasedInputRow) r;
        } catch (ClassCastException e) {
            throw new ParseException(null, "Unsupported input format in valueFormat. KafkaInputFormat only supports input format that return MapBasedInputRow rows");
        Map<String, Object> event = new HashMap<>(headerKeyList);
        /* Currently we prefer payload attributes if there is a collision in names.
              We can change this beahvior in later changes with a config knob. This default
              behavior lets easy porting of existing inputFormats to the new one without any changes.
        HashSet<String> newDimensions = new HashSet<String>(valueRow.getDimensions());
        // Remove the dummy timestamp added in KafkaInputFormat
        return new MapBasedInputRow(inputRowSchema.getTimestampSpec().extractTimestamp(event), getFinalDimensionList(newDimensions), event);
Also used : HashMap(java.util.HashMap) MapBasedInputRow( ParseException( HashSet(java.util.HashSet)

Example 30 with ParseException

use of in project druid by druid-io.

the class InputSourceSampler method sample.

public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
    Preconditions.checkNotNull(inputSource, "inputSource required");
    if (inputSource.needsFormat()) {
        Preconditions.checkNotNull(inputFormat, "inputFormat required");
    final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
    final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
    final Closer closer = Closer.create();
    final File tempDir = FileUtils.createTempDir();
    closer.register(() -> FileUtils.deleteDirectory(tempDir));
    try {
        final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
        try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
            final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
            final Closer closer1 = closer) {
            List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
            int numRowsIndexed = 0;
            while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
                final InputRowListPlusRawValues inputRowListPlusRawValues =;
                final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
                final ParseException parseException = inputRowListPlusRawValues.getParseException();
                if (parseException != null) {
                    if (rawColumnsList != null) {
                        // add all rows to response
                        responseRows.addAll( -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
                    } else {
                        // no data parsed, add one response row
                        responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
                List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
                if (inputRows == null) {
                for (int i = 0; i < inputRows.size(); i++) {
                    // InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
                    Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
                    InputRow row = inputRows.get(i);
                    // keep the index of the row to be added to responseRows for further use
                    final int rowIndex = responseRows.size();
                    IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
                    if (addResult.hasParseException()) {
                        responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
                    } else {
                        // store the raw value; will be merged with the data from the IncrementalIndex later
                        responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
            final List<String> columnNames = index.getColumnNames();
            for (Row row : index) {
                Map<String, Object> parsed = new LinkedHashMap<>();
                parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
                columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
                Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
                if (sortKey != null) {
                    responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
            // make sure size of responseRows meets the input
            if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
                responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
            int numRowsRead = responseRows.size();
            return new SamplerResponse(numRowsRead, numRowsIndexed, -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
    } catch (Exception e) {
        throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Closer( InputRowListPlusRawValues( IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) ParseException( DataSchema(org.apache.druid.segment.indexing.DataSchema) InputSourceReader( TimedShutoffInputSourceReader( IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow( SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ParseException( Row( SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InputRow( File( LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)


ParseException ( IOException ( InputRow ( Map (java.util.Map)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)4 MapBasedInputRow ( VisibleForTesting ( List (java.util.List)3 Nullable (javax.annotation.Nullable)3 Schema (org.apache.avro.Schema)3 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 ISE ( JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 Descriptors ( DynamicMessage ( EOFException (