use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class OffheapIncrementalIndex method addToFacts.
@Override
protected Integer addToFacts(AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, InputRow row, AtomicInteger numEntries, TimeAndDims key, ThreadLocal<InputRow> rowContainer, Supplier<InputRow> rowSupplier) throws IndexSizeExceededException {
ByteBuffer aggBuffer;
int bufferIndex;
int bufferOffset;
synchronized (this) {
final Integer priorIndex = facts.getPriorIndex(key);
if (null != priorIndex) {
final int[] indexAndOffset = indexAndOffsets.get(priorIndex);
bufferIndex = indexAndOffset[0];
bufferOffset = indexAndOffset[1];
aggBuffer = aggBuffers.get(bufferIndex).get();
} else {
if (metrics.length > 0 && getAggs()[0] == null) {
// note: creation of Aggregators is done lazily when at least one row from input is available
// so that FilteredAggregators could be initialized correctly.
rowContainer.set(row);
for (int i = 0; i < metrics.length; i++) {
final AggregatorFactory agg = metrics[i];
getAggs()[i] = agg.factorizeBuffered(makeColumnSelectorFactory(agg, rowSupplier, deserializeComplexMetrics));
}
rowContainer.set(null);
}
bufferIndex = aggBuffers.size() - 1;
ByteBuffer lastBuffer = aggBuffers.isEmpty() ? null : aggBuffers.get(aggBuffers.size() - 1).get();
int[] lastAggregatorsIndexAndOffset = indexAndOffsets.isEmpty() ? null : indexAndOffsets.get(indexAndOffsets.size() - 1);
if (lastAggregatorsIndexAndOffset != null && lastAggregatorsIndexAndOffset[0] != bufferIndex) {
throw new ISE("last row's aggregate's buffer and last buffer index must be same");
}
bufferOffset = aggsTotalSize + (lastAggregatorsIndexAndOffset != null ? lastAggregatorsIndexAndOffset[1] : 0);
if (lastBuffer != null && lastBuffer.capacity() - bufferOffset >= aggsTotalSize) {
aggBuffer = lastBuffer;
} else {
ResourceHolder<ByteBuffer> bb = bufferPool.take();
aggBuffers.add(bb);
bufferIndex = aggBuffers.size() - 1;
bufferOffset = 0;
aggBuffer = bb.get();
}
for (int i = 0; i < metrics.length; i++) {
getAggs()[i].init(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
}
// Last ditch sanity checks
if (numEntries.get() >= maxRowCount && facts.getPriorIndex(key) == null) {
throw new IndexSizeExceededException("Maximum number of rows [%d] reached", maxRowCount);
}
final Integer rowIndex = indexIncrement.getAndIncrement();
// note that indexAndOffsets must be updated before facts, because as soon as we update facts
// concurrent readers get hold of it and might ask for newly added row
indexAndOffsets.add(new int[] { bufferIndex, bufferOffset });
final Integer prev = facts.putIfAbsent(key, rowIndex);
if (null == prev) {
numEntries.incrementAndGet();
} else {
throw new ISE("WTF! we are in sychronized block.");
}
}
}
rowContainer.set(row);
for (int i = 0; i < metrics.length; i++) {
final BufferAggregator agg = getAggs()[i];
synchronized (agg) {
try {
agg.aggregate(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", getMetricAggs()[i].getName());
} else {
log.debug(e, "Encountered parse error, skipping aggregator[%s].", getMetricAggs()[i].getName());
}
}
}
}
rowContainer.set(null);
return numEntries.get();
}
use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class OnheapIncrementalIndex method doAggregate.
private void doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, boolean reportParseExceptions) {
rowContainer.set(row);
for (int i = 0; i < aggs.length; i++) {
final Aggregator agg = aggs[i];
synchronized (agg) {
try {
agg.aggregate();
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", metrics[i].getName());
} else {
log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
}
}
}
}
rowContainer.set(null);
}
use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class SpatialDimensionRowTransformer method apply.
@Override
public InputRow apply(final InputRow row) {
final Map<String, List<String>> spatialLookup = Maps.newHashMap();
// remove all spatial dimensions
final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {
@Override
public boolean apply(String input) {
return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
}
}));
InputRow retVal = new InputRow() {
@Override
public List<String> getDimensions() {
return finalDims;
}
@Override
public long getTimestampFromEpoch() {
return row.getTimestampFromEpoch();
}
@Override
public DateTime getTimestamp() {
return row.getTimestamp();
}
@Override
public List<String> getDimension(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getDimension(dimension) : retVal;
}
@Override
public Object getRaw(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getRaw(dimension) : retVal;
}
@Override
public long getLongMetric(String metric) {
try {
return row.getLongMetric(metric);
} catch (ParseException e) {
throw Throwables.propagate(e);
}
}
@Override
public float getFloatMetric(String metric) {
try {
return row.getFloatMetric(metric);
} catch (ParseException e) {
throw Throwables.propagate(e);
}
}
@Override
public String toString() {
return row.toString();
}
@Override
public int compareTo(Row o) {
return getTimestamp().compareTo(o.getTimestamp());
}
};
for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
final String spatialDimName = entry.getKey();
final SpatialDimensionSchema spatialDim = entry.getValue();
List<String> dimVals = row.getDimension(spatialDimName);
if (dimVals != null && !dimVals.isEmpty()) {
if (dimVals.size() != 1) {
throw new ISE("Spatial dimension value must be in an array!");
}
if (isJoinedSpatialDimValValid(dimVals.get(0))) {
spatialLookup.put(spatialDimName, dimVals);
finalDims.add(spatialDimName);
}
} else {
List<String> spatialDimVals = Lists.newArrayList();
for (String dim : spatialDim.getDims()) {
List<String> partialDimVals = row.getDimension(dim);
if (isSpatialDimValsValid(partialDimVals)) {
spatialDimVals.addAll(partialDimVals);
}
}
if (spatialDimVals.size() == spatialDim.getDims().size()) {
spatialLookup.put(spatialDimName, Arrays.asList(JOINER.join(spatialDimVals)));
finalDims.add(spatialDimName);
}
}
}
return retVal;
}
use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class StringArrayWritable method toBytes.
public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) {
try {
ByteArrayDataOutput out = ByteStreams.newDataOutput();
//write timestamp
out.writeLong(row.getTimestampFromEpoch());
//writing all dimensions
List<String> dimList = row.getDimensions();
WritableUtils.writeVInt(out, dimList.size());
if (dimList != null) {
for (String dim : dimList) {
List<String> dimValues = row.getDimension(dim);
writeString(dim, out);
writeStringArray(dimValues, out);
}
}
//writing all metrics
Supplier<InputRow> supplier = new Supplier<InputRow>() {
@Override
public InputRow get() {
return row;
}
};
WritableUtils.writeVInt(out, aggs.length);
for (AggregatorFactory aggFactory : aggs) {
String k = aggFactory.getName();
writeString(k, out);
Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true));
try {
agg.aggregate();
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", k);
}
log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
}
String t = aggFactory.getTypeName();
if (t.equals("float")) {
out.writeFloat(agg.getFloat());
} else if (t.equals("long")) {
WritableUtils.writeVLong(out, agg.getLong());
} else {
//its a complex metric
Object val = agg.get();
ComplexMetricSerde serde = getComplexMetricSerde(t);
writeBytes(serde.toBytes(val), out);
}
}
return out.toByteArray();
} catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.
the class IndexTask method generateAndPublishSegments.
private boolean generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final Map<Interval, List<ShardSpec>> shardSpecs, final String version, final FirehoseFactory firehoseFactory) throws IOException, InterruptedException {
final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
final Map<String, ShardSpec> sequenceNameToShardSpecMap = Maps.newHashMap();
if (toolbox.getMonitorScheduler() != null) {
toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
}
final SegmentAllocator segmentAllocator;
if (ingestionSchema.getIOConfig().isAppendToExisting()) {
segmentAllocator = new ActionBasedSegmentAllocator(toolbox.getTaskActionClient(), dataSchema);
} else {
segmentAllocator = new SegmentAllocator() {
@Override
public SegmentIdentifier allocate(DateTime timestamp, String sequenceName, String previousSegmentId) throws IOException {
Optional<Interval> interval = granularitySpec.bucketInterval(timestamp);
if (!interval.isPresent()) {
throw new ISE("Could not find interval for timestamp [%s]", timestamp);
}
ShardSpec shardSpec = sequenceNameToShardSpecMap.get(sequenceName);
if (shardSpec == null) {
throw new ISE("Could not find ShardSpec for sequenceName [%s]", sequenceName);
}
return new SegmentIdentifier(getDataSource(), interval.get(), version, shardSpec);
}
};
}
try (final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())) {
final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
final Map<Interval, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
if (driver.startJob() != null) {
driver.clear();
}
try {
while (firehose.hasMore()) {
try {
final InputRow inputRow = firehose.nextRow();
final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
if (!optInterval.isPresent()) {
fireDepartmentMetrics.incrementThrownAway();
continue;
}
final Interval interval = optInterval.get();
if (!shardSpecLookups.containsKey(interval)) {
final List<ShardSpec> intervalShardSpecs = shardSpecs.get(interval);
if (intervalShardSpecs == null || intervalShardSpecs.isEmpty()) {
throw new ISE("Failed to get shardSpec for interval[%s]", interval);
}
shardSpecLookups.put(interval, intervalShardSpecs.get(0).getLookup(intervalShardSpecs));
}
final ShardSpec shardSpec = shardSpecLookups.get(interval).getShardSpec(inputRow.getTimestampFromEpoch(), inputRow);
final String sequenceName = String.format("index_%s_%s_%d", interval, version, shardSpec.getPartitionNum());
if (!sequenceNameToShardSpecMap.containsKey(sequenceName)) {
final ShardSpec shardSpecForPublishing = ingestionSchema.getTuningConfig().isForceExtendableShardSpecs() || ingestionSchema.getIOConfig().isAppendToExisting() ? new NumberedShardSpec(shardSpec.getPartitionNum(), shardSpecs.get(interval).size()) : shardSpec;
sequenceNameToShardSpecMap.put(sequenceName, shardSpecForPublishing);
}
final SegmentIdentifier identifier = driver.add(inputRow, sequenceName, committerSupplier);
if (identifier == null) {
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
fireDepartmentMetrics.incrementProcessed();
} catch (ParseException e) {
if (ingestionSchema.getTuningConfig().isReportParseExceptions()) {
throw e;
} else {
fireDepartmentMetrics.incrementUnparseable();
}
}
}
} finally {
driver.persist(committerSupplier.get());
}
final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {
@Override
public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
final SegmentTransactionalInsertAction action = new SegmentTransactionalInsertAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action).isSuccess();
}
};
final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
if (published == null) {
log.error("Failed to publish segments, aborting!");
return false;
} else {
log.info("Published segments[%s]", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {
@Override
public String apply(DataSegment input) {
return input.getIdentifier();
}
})));
return true;
}
}
}
Aggregations