Search in sources :

Example 26 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class IncrementalIndex method add.

/**
 * Adds a new row.  The row might correspond with another row that already exists, in which case this will
 * update that row instead of inserting a new one.
 * <p>
 * <p>
 * Calls to add() are thread safe.
 * <p>
 *
 * @param row                      the row of data to add
 * @param skipMaxRowsInMemoryCheck whether or not to skip the check of rows exceeding the max rows limit
 * @return the number of rows in the data set after adding the InputRow. If any parse failure occurs, a {@link ParseException} is returned in {@link IncrementalIndexAddResult}.
 * @throws IndexSizeExceededException this exception is thrown once it reaches max rows limit and skipMaxRowsInMemoryCheck is set to false.
 */
public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException {
    IncrementalIndexRowResult incrementalIndexRowResult = toIncrementalIndexRow(row);
    final AddToFactsResult addToFactsResult = addToFacts(row, incrementalIndexRowResult.getIncrementalIndexRow(), in, rowSupplier, skipMaxRowsInMemoryCheck);
    updateMaxIngestedTime(row.getTimestamp());
    @Nullable ParseException parseException = getCombinedParseException(row, incrementalIndexRowResult.getParseExceptionMessages(), addToFactsResult.getParseExceptionMessages());
    return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), addToFactsResult.getBytesInMemory(), parseException);
}
Also used : UnparseableColumnsParseException(org.apache.druid.java.util.common.parsers.UnparseableColumnsParseException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Nullable(javax.annotation.Nullable)

Example 27 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class OnheapIncrementalIndex method doAggregate.

/**
 * Performs aggregation for all of the aggregators.
 *
 * @return Total incremental memory in bytes required by this step of the
 * aggregation. The returned value is non-zero only if
 * {@link #useMaxMemoryEstimates} is false.
 */
private long doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, List<String> parseExceptionsHolder) {
    rowContainer.set(row);
    long totalIncrementalBytes = 0L;
    for (int i = 0; i < aggs.length; i++) {
        final Aggregator agg = aggs[i];
        synchronized (agg) {
            try {
                if (useMaxMemoryEstimates) {
                    agg.aggregate();
                } else {
                    totalIncrementalBytes += agg.aggregateWithSize();
                }
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
                parseExceptionsHolder.add(e.getMessage());
            }
        }
    }
    rowContainer.set(null);
    return totalIncrementalBytes;
}
Also used : PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 28 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class Plumbers method addNextRow.

public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) throws IOException {
    final InputRow inputRow;
    try {
        inputRow = firehose.nextRow();
    } catch (ParseException e) {
        if (reportParseExceptions) {
            throw e;
        } else {
            log.debug(e, "Discarded row due to exception, considering unparseable.");
            metrics.incrementUnparseable();
            return;
        }
    }
    if (inputRow == null) {
        log.debug("Discarded null row, considering thrownAway.");
        metrics.incrementThrownAway();
        return;
    }
    final IncrementalIndexAddResult addResult;
    try {
        addResult = plumber.add(inputRow, committerSupplier);
    } catch (IndexSizeExceededException e) {
        // plumber.add should be swapping out indexes before they fill up.
        throw new ISE(e, "Index size exceeded");
    }
    if (addResult.getRowCount() == -1) {
        metrics.incrementThrownAway();
        log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded());
        return;
    }
    if (addResult.getRowCount() == -2) {
        metrics.incrementDedup();
        log.debug("Discarded row[%s], considering duplication.", inputRow);
        return;
    }
    metrics.incrementProcessed();
}
Also used : IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow(org.apache.druid.data.input.InputRow) ISE(org.apache.druid.java.util.common.ISE) ParseException(org.apache.druid.java.util.common.parsers.ParseException) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 29 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class KafkaInputReader method buildBlendedRows.

private CloseableIterator<InputRow> buildBlendedRows(InputEntityReader valueParser, Map<String, Object> headerKeyList) throws IOException {
    return valueParser.read().map(r -> {
        MapBasedInputRow valueRow;
        try {
            // Return type for the value parser should be of type MapBasedInputRow
            // Parsers returning other types are not compatible currently.
            valueRow = (MapBasedInputRow) r;
        } catch (ClassCastException e) {
            throw new ParseException(null, "Unsupported input format in valueFormat. KafkaInputFormat only supports input format that return MapBasedInputRow rows");
        }
        Map<String, Object> event = new HashMap<>(headerKeyList);
        /* Currently we prefer payload attributes if there is a collision in names.
              We can change this beahvior in later changes with a config knob. This default
              behavior lets easy porting of existing inputFormats to the new one without any changes.
            */
        event.putAll(valueRow.getEvent());
        HashSet<String> newDimensions = new HashSet<String>(valueRow.getDimensions());
        newDimensions.addAll(headerKeyList.keySet());
        // Remove the dummy timestamp added in KafkaInputFormat
        newDimensions.remove(KafkaInputFormat.DEFAULT_AUTO_TIMESTAMP_STRING);
        return new MapBasedInputRow(inputRowSchema.getTimestampSpec().extractTimestamp(event), getFinalDimensionList(newDimensions), event);
    });
}
Also used : HashMap(java.util.HashMap) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException) HashSet(java.util.HashSet)

Example 30 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class InputSourceSampler method sample.

public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
    Preconditions.checkNotNull(inputSource, "inputSource required");
    if (inputSource.needsFormat()) {
        Preconditions.checkNotNull(inputFormat, "inputFormat required");
    }
    final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
    final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
    final Closer closer = Closer.create();
    final File tempDir = FileUtils.createTempDir();
    closer.register(() -> FileUtils.deleteDirectory(tempDir));
    try {
        final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
        try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
            final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
            final Closer closer1 = closer) {
            List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
            int numRowsIndexed = 0;
            while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
                final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
                final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
                final ParseException parseException = inputRowListPlusRawValues.getParseException();
                if (parseException != null) {
                    if (rawColumnsList != null) {
                        // add all rows to response
                        responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
                    } else {
                        // no data parsed, add one response row
                        responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
                    }
                    continue;
                }
                List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
                if (inputRows == null) {
                    continue;
                }
                for (int i = 0; i < inputRows.size(); i++) {
                    // InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
                    Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
                    InputRow row = inputRows.get(i);
                    // keep the index of the row to be added to responseRows for further use
                    final int rowIndex = responseRows.size();
                    IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
                    if (addResult.hasParseException()) {
                        responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
                    } else {
                        // store the raw value; will be merged with the data from the IncrementalIndex later
                        responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
                        numRowsIndexed++;
                    }
                }
            }
            final List<String> columnNames = index.getColumnNames();
            columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
            for (Row row : index) {
                Map<String, Object> parsed = new LinkedHashMap<>();
                parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
                columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
                Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
                if (sortKey != null) {
                    responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
                }
            }
            // make sure size of responseRows meets the input
            if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
                responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
            }
            int numRowsRead = responseRows.size();
            return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
        }
    } catch (Exception e) {
        throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Closer(org.apache.druid.java.util.common.io.Closer) InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) ParseException(org.apache.druid.java.util.common.parsers.ParseException) DataSchema(org.apache.druid.segment.indexing.DataSchema) InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimedShutoffInputSourceReader(org.apache.druid.data.input.impl.TimedShutoffInputSourceReader) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow(org.apache.druid.data.input.InputRow) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Row(org.apache.druid.data.input.Row) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

ParseException (org.apache.druid.java.util.common.parsers.ParseException)30 IOException (java.io.IOException)9 InputRow (org.apache.druid.data.input.InputRow)8 Map (java.util.Map)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 List (java.util.List)3 Nullable (javax.annotation.Nullable)3 Schema (org.apache.avro.Schema)3 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 ISE (org.apache.druid.java.util.common.ISE)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 Descriptors (com.google.protobuf.Descriptors)2 DynamicMessage (com.google.protobuf.DynamicMessage)2 EOFException (java.io.EOFException)2