use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class IncrementalIndex method add.
/**
* Adds a new row. The row might correspond with another row that already exists, in which case this will
* update that row instead of inserting a new one.
* <p>
* <p>
* Calls to add() are thread safe.
* <p>
*
* @param row the row of data to add
* @param skipMaxRowsInMemoryCheck whether or not to skip the check of rows exceeding the max rows limit
* @return the number of rows in the data set after adding the InputRow. If any parse failure occurs, a {@link ParseException} is returned in {@link IncrementalIndexAddResult}.
* @throws IndexSizeExceededException this exception is thrown once it reaches max rows limit and skipMaxRowsInMemoryCheck is set to false.
*/
public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException {
IncrementalIndexRowResult incrementalIndexRowResult = toIncrementalIndexRow(row);
final AddToFactsResult addToFactsResult = addToFacts(row, incrementalIndexRowResult.getIncrementalIndexRow(), in, rowSupplier, skipMaxRowsInMemoryCheck);
updateMaxIngestedTime(row.getTimestamp());
@Nullable ParseException parseException = getCombinedParseException(row, incrementalIndexRowResult.getParseExceptionMessages(), addToFactsResult.getParseExceptionMessages());
return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), addToFactsResult.getBytesInMemory(), parseException);
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class OnheapIncrementalIndex method doAggregate.
/**
* Performs aggregation for all of the aggregators.
*
* @return Total incremental memory in bytes required by this step of the
* aggregation. The returned value is non-zero only if
* {@link #useMaxMemoryEstimates} is false.
*/
private long doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, List<String> parseExceptionsHolder) {
rowContainer.set(row);
long totalIncrementalBytes = 0L;
for (int i = 0; i < aggs.length; i++) {
final Aggregator agg = aggs[i];
synchronized (agg) {
try {
if (useMaxMemoryEstimates) {
agg.aggregate();
} else {
totalIncrementalBytes += agg.aggregateWithSize();
}
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
parseExceptionsHolder.add(e.getMessage());
}
}
}
rowContainer.set(null);
return totalIncrementalBytes;
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class Plumbers method addNextRow.
public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) throws IOException {
final InputRow inputRow;
try {
inputRow = firehose.nextRow();
} catch (ParseException e) {
if (reportParseExceptions) {
throw e;
} else {
log.debug(e, "Discarded row due to exception, considering unparseable.");
metrics.incrementUnparseable();
return;
}
}
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
metrics.incrementThrownAway();
return;
}
final IncrementalIndexAddResult addResult;
try {
addResult = plumber.add(inputRow, committerSupplier);
} catch (IndexSizeExceededException e) {
// plumber.add should be swapping out indexes before they fill up.
throw new ISE(e, "Index size exceeded");
}
if (addResult.getRowCount() == -1) {
metrics.incrementThrownAway();
log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded());
return;
}
if (addResult.getRowCount() == -2) {
metrics.incrementDedup();
log.debug("Discarded row[%s], considering duplication.", inputRow);
return;
}
metrics.incrementProcessed();
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class KafkaInputReader method buildBlendedRows.
private CloseableIterator<InputRow> buildBlendedRows(InputEntityReader valueParser, Map<String, Object> headerKeyList) throws IOException {
return valueParser.read().map(r -> {
MapBasedInputRow valueRow;
try {
// Return type for the value parser should be of type MapBasedInputRow
// Parsers returning other types are not compatible currently.
valueRow = (MapBasedInputRow) r;
} catch (ClassCastException e) {
throw new ParseException(null, "Unsupported input format in valueFormat. KafkaInputFormat only supports input format that return MapBasedInputRow rows");
}
Map<String, Object> event = new HashMap<>(headerKeyList);
/* Currently we prefer payload attributes if there is a collision in names.
We can change this beahvior in later changes with a config knob. This default
behavior lets easy porting of existing inputFormats to the new one without any changes.
*/
event.putAll(valueRow.getEvent());
HashSet<String> newDimensions = new HashSet<String>(valueRow.getDimensions());
newDimensions.addAll(headerKeyList.keySet());
// Remove the dummy timestamp added in KafkaInputFormat
newDimensions.remove(KafkaInputFormat.DEFAULT_AUTO_TIMESTAMP_STRING);
return new MapBasedInputRow(inputRowSchema.getTimestampSpec().extractTimestamp(event), getFinalDimensionList(newDimensions), event);
});
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class InputSourceSampler method sample.
public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
Preconditions.checkNotNull(inputSource, "inputSource required");
if (inputSource.needsFormat()) {
Preconditions.checkNotNull(inputFormat, "inputFormat required");
}
final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
final Closer closer = Closer.create();
final File tempDir = FileUtils.createTempDir();
closer.register(() -> FileUtils.deleteDirectory(tempDir));
try {
final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
final Closer closer1 = closer) {
List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
int numRowsIndexed = 0;
while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
final ParseException parseException = inputRowListPlusRawValues.getParseException();
if (parseException != null) {
if (rawColumnsList != null) {
// add all rows to response
responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
} else {
// no data parsed, add one response row
responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
}
continue;
}
List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
if (inputRows == null) {
continue;
}
for (int i = 0; i < inputRows.size(); i++) {
// InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
InputRow row = inputRows.get(i);
// keep the index of the row to be added to responseRows for further use
final int rowIndex = responseRows.size();
IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
if (addResult.hasParseException()) {
responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
} else {
// store the raw value; will be merged with the data from the IncrementalIndex later
responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
numRowsIndexed++;
}
}
}
final List<String> columnNames = index.getColumnNames();
columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
for (Row row : index) {
Map<String, Object> parsed = new LinkedHashMap<>();
parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
if (sortKey != null) {
responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
}
}
// make sure size of responseRows meets the input
if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
}
int numRowsRead = responseRows.size();
return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
}
} catch (Exception e) {
throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
}
}
Aggregations