use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class InputRowSerde method toBytes.
public static SerializeResult toBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, final InputRow row, AggregatorFactory[] aggs) {
try {
List<String> parseExceptionMessages = new ArrayList<>();
ByteArrayDataOutput out = ByteStreams.newDataOutput();
// write timestamp
out.writeLong(row.getTimestampFromEpoch());
// writing all dimensions
List<String> dimList = row.getDimensions();
WritableUtils.writeVInt(out, dimList.size());
for (String dim : dimList) {
IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim);
if (typeHelper == null) {
typeHelper = STRING_HELPER;
}
writeString(dim, out);
try {
typeHelper.serialize(out, row.getRaw(dim));
} catch (ParseException pe) {
parseExceptionMessages.add(pe.getMessage());
}
}
// writing all metrics
Supplier<InputRow> supplier = () -> row;
WritableUtils.writeVInt(out, aggs.length);
for (AggregatorFactory aggFactory : aggs) {
String k = aggFactory.getName();
writeString(k, out);
try (Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true))) {
try {
agg.aggregate();
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
parseExceptionMessages.add(e.getMessage());
}
final ColumnType type = aggFactory.getIntermediateType();
if (agg.isNull()) {
out.writeByte(NullHandling.IS_NULL_BYTE);
} else {
out.writeByte(NullHandling.IS_NOT_NULL_BYTE);
if (type.is(ValueType.FLOAT)) {
out.writeFloat(agg.getFloat());
} else if (type.is(ValueType.LONG)) {
WritableUtils.writeVLong(out, agg.getLong());
} else if (type.is(ValueType.DOUBLE)) {
out.writeDouble(agg.getDouble());
} else if (type.is(ValueType.COMPLEX)) {
Object val = agg.get();
ComplexMetricSerde serde = getComplexMetricSerde(type.getComplexTypeName());
writeBytes(serde.toBytes(val), out);
} else {
throw new IAE("Unable to serialize type[%s]", type.asTypeString());
}
}
}
}
return new SerializeResult(out.toByteArray(), parseExceptionMessages);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class HadoopDruidIndexerMapper method map.
@Override
protected void map(Object key, Object value, Context context) throws IOException, InterruptedException {
try {
final List<InputRow> inputRows = parseInputRow(value, parser);
for (InputRow inputRow : inputRows) {
try {
if (inputRow == null) {
// Throw away null rows from the parser.
log.debug("Throwing away row [%s]", value);
context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1);
continue;
}
if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) {
final String errorMsg = StringUtils.format("Encountered row with timestamp that cannot be represented as a long: [%s]", inputRow);
throw new ParseException(null, errorMsg);
}
if (granularitySpec.inputIntervals().isEmpty() || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())).isPresent()) {
innerMap(inputRow, context);
} else {
context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1);
}
} catch (ParseException pe) {
handleParseException(pe, context);
}
}
} catch (ParseException pe) {
handleParseException(pe, context);
} catch (RuntimeException e) {
throw new RE(e, "Failure on row[%s]", value);
}
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class IncrementalIndexAddResultTest method testHasParseException.
@Test
public void testHasParseException() {
Assert.assertFalse(new IncrementalIndexAddResult(0, 0L).hasParseException());
Assert.assertFalse(new IncrementalIndexAddResult(0, 0L, "test").hasParseException());
Assert.assertTrue(new IncrementalIndexAddResult(0, 0L, new ParseException(null, "test")).hasParseException());
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class IncrementalIndex method toIncrementalIndexRow.
@VisibleForTesting
IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) {
row = formatRow(row);
if (row.getTimestampFromEpoch() < minTimestamp) {
throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, DateTimes.utc(minTimestamp));
}
final List<String> rowDimensions = row.getDimensions();
Object[] dims;
List<Object> overflow = null;
long dimsKeySize = 0;
List<String> parseExceptionMessages = new ArrayList<>();
synchronized (dimensionDescs) {
// all known dimensions are assumed missing until we encounter in the rowDimensions
Set<String> absentDimensions = Sets.newHashSet(dimensionDescs.keySet());
// first, process dimension values present in the row
dims = new Object[dimensionDescs.size()];
for (String dimension : rowDimensions) {
if (Strings.isNullOrEmpty(dimension)) {
continue;
}
boolean wasNewDim = false;
DimensionDesc desc = dimensionDescs.get(dimension);
if (desc != null) {
absentDimensions.remove(dimension);
} else {
wasNewDim = true;
desc = addNewDimension(dimension, DimensionHandlerUtils.getHandlerFromCapabilities(dimension, // based on the value to use a better handler
makeDefaultCapabilitiesFromValueType(ColumnType.STRING), null));
}
DimensionIndexer indexer = desc.getIndexer();
Object dimsKey = null;
try {
final EncodedKeyComponent<?> encodedKeyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true);
dimsKey = encodedKeyComponent.getComponent();
dimsKeySize += encodedKeyComponent.getEffectiveSizeBytes();
} catch (ParseException pe) {
parseExceptionMessages.add(pe.getMessage());
}
if (wasNewDim) {
// unless this is the first row we are processing, all newly discovered columns will be sparse
if (maxIngestedEventTime != null) {
indexer.setSparseIndexed();
}
if (overflow == null) {
overflow = new ArrayList<>();
}
overflow.add(dimsKey);
} else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
/*
* index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
* otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
* it must have been added to dimensionOrder during this InputRow.
*
* if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
* we saw it on a previous input row (this its safe to index into dims). If we found a value in
* the dims array for this index, it means we have seen this dimension already on this input row.
*/
throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
} else {
dims[desc.getIndex()] = dimsKey;
}
}
// process any dimensions with missing values in the row
for (String missing : absentDimensions) {
dimensionDescs.get(missing).getIndexer().setSparseIndexed();
}
}
if (overflow != null) {
// Merge overflow and non-overflow
Object[] newDims = new Object[dims.length + overflow.size()];
System.arraycopy(dims, 0, newDims, 0, dims.length);
for (int i = 0; i < overflow.size(); ++i) {
newDims[dims.length + i] = overflow.get(i);
}
dims = newDims;
}
long truncated = 0;
if (row.getTimestamp() != null) {
truncated = gran.bucketStart(row.getTimestampFromEpoch());
}
IncrementalIndexRow incrementalIndexRow = IncrementalIndexRow.createTimeAndDimswithDimsKeySize(Math.max(truncated, minTimestamp), dims, dimensionDescsList, dimsKeySize);
return new IncrementalIndexRowResult(incrementalIndexRow, parseExceptionMessages);
}
Aggregations