protected Integer addToFacts(AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, InputRow row, AtomicInteger numEntries, TimeAndDims key, ThreadLocal<InputRow> rowContainer, Supplier<InputRow> rowSupplier) throws IndexSizeExceededException {
ByteBuffer aggBuffer;
int bufferIndex;
int bufferOffset;
synchronized (this) {
final Integer priorIndex = facts.getPriorIndex(key);
if (null != priorIndex) {
final int[] indexAndOffset = indexAndOffsets.get(priorIndex);
bufferIndex = indexAndOffset[0];
bufferOffset = indexAndOffset[1];
aggBuffer = aggBuffers.get(bufferIndex).get();
} else {
if (metrics.length > 0 && getAggs()[0] == null) {
// note: creation of Aggregators is done lazily when at least one row from input is available
// so that FilteredAggregators could be initialized correctly.
for (int i = 0; i < metrics.length; i++) {
final AggregatorFactory agg = metrics[i];
getAggs()[i] = agg.factorizeBuffered(makeColumnSelectorFactory(agg, rowSupplier, deserializeComplexMetrics));
bufferIndex = aggBuffers.size() - 1;
ByteBuffer lastBuffer = aggBuffers.isEmpty() ? null : aggBuffers.get(aggBuffers.size() - 1).get();
int[] lastAggregatorsIndexAndOffset = indexAndOffsets.isEmpty() ? null : indexAndOffsets.get(indexAndOffsets.size() - 1);
if (lastAggregatorsIndexAndOffset != null && lastAggregatorsIndexAndOffset[0] != bufferIndex) {
throw new ISE("last row's aggregate's buffer and last buffer index must be same");
bufferOffset = aggsTotalSize + (lastAggregatorsIndexAndOffset != null ? lastAggregatorsIndexAndOffset[1] : 0);
if (lastBuffer != null && lastBuffer.capacity() - bufferOffset >= aggsTotalSize) {
aggBuffer = lastBuffer;
} else {
ResourceHolder<ByteBuffer> bb = bufferPool.take();
bufferIndex = aggBuffers.size() - 1;
bufferOffset = 0;
aggBuffer = bb.get();
for (int i = 0; i < metrics.length; i++) {
getAggs()[i].init(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
// Last ditch sanity checks
if (numEntries.get() >= maxRowCount && facts.getPriorIndex(key) == null) {
throw new IndexSizeExceededException("Maximum number of rows [%d] reached", maxRowCount);
final Integer rowIndex = indexIncrement.getAndIncrement();
// note that indexAndOffsets must be updated before facts, because as soon as we update facts
// concurrent readers get hold of it and might ask for newly added row
indexAndOffsets.add(new int[] { bufferIndex, bufferOffset });
final Integer prev = facts.putIfAbsent(key, rowIndex);
if (null == prev) {
} else {
throw new ISE("WTF! we are in sychronized block.");
for (int i = 0; i < metrics.length; i++) {
final BufferAggregator agg = getAggs()[i];
synchronized (agg) {
try {
agg.aggregate(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", getMetricAggs()[i].getName());
} else {
log.debug(e, "Encountered parse error, skipping aggregator[%s].", getMetricAggs()[i].getName());
return numEntries.get();
private void doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, boolean reportParseExceptions) {
for (int i = 0; i < aggs.length; i++) {
final Aggregator agg = aggs[i];
synchronized (agg) {
try {
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", metrics[i].getName());
} else {
log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
public InputRow apply(final InputRow row) {
final Map<String, List<String>> spatialLookup = Maps.newHashMap();
// remove all spatial dimensions
final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {
public boolean apply(String input) {
return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
InputRow retVal = new InputRow() {
public List<String> getDimensions() {
return finalDims;
public long getTimestampFromEpoch() {
return row.getTimestampFromEpoch();
public DateTime getTimestamp() {
return row.getTimestamp();
public List<String> getDimension(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getDimension(dimension) : retVal;
public Object getRaw(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getRaw(dimension) : retVal;
public long getLongMetric(String metric) {
try {
return row.getLongMetric(metric);
} catch (ParseException e) {
throw Throwables.propagate(e);
public float getFloatMetric(String metric) {
try {
return row.getFloatMetric(metric);
} catch (ParseException e) {
throw Throwables.propagate(e);
public String toString() {
return row.toString();
public int compareTo(Row o) {
return getTimestamp().compareTo(o.getTimestamp());
for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
final String spatialDimName = entry.getKey();
final SpatialDimensionSchema spatialDim = entry.getValue();
List<String> dimVals = row.getDimension(spatialDimName);
if (dimVals != null && !dimVals.isEmpty()) {
if (dimVals.size() != 1) {
throw new ISE("Spatial dimension value must be in an array!");
if (isJoinedSpatialDimValValid(dimVals.get(0))) {
spatialLookup.put(spatialDimName, dimVals);
} else {
List<String> spatialDimVals = Lists.newArrayList();
for (String dim : spatialDim.getDims()) {
List<String> partialDimVals = row.getDimension(dim);
if (isSpatialDimValsValid(partialDimVals)) {
if (spatialDimVals.size() == spatialDim.getDims().size()) {
spatialLookup.put(spatialDimName, Arrays.asList(JOINER.join(spatialDimVals)));
return retVal;
public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) {
try {
ByteArrayDataOutput out = ByteStreams.newDataOutput();
//write timestamp
//writing all dimensions
List<String> dimList = row.getDimensions();
WritableUtils.writeVInt(out, dimList.size());
if (dimList != null) {
for (String dim : dimList) {
List<String> dimValues = row.getDimension(dim);
writeString(dim, out);
writeStringArray(dimValues, out);
//writing all metrics
Supplier<InputRow> supplier = new Supplier<InputRow>() {
public InputRow get() {
return row;
WritableUtils.writeVInt(out, aggs.length);
for (AggregatorFactory aggFactory : aggs) {
String k = aggFactory.getName();
writeString(k, out);
Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true));
try {
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw new ParseException(e, "Encountered parse error for aggregator[%s]", k);
log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
String t = aggFactory.getTypeName();
if (t.equals("float")) {
} else if (t.equals("long")) {
WritableUtils.writeVLong(out, agg.getLong());
} else {
//its a complex metric
Object val = agg.get();
ComplexMetricSerde serde = getComplexMetricSerde(t);
writeBytes(serde.toBytes(val), out);
return out.toByteArray();
} catch (IOException ex) {
throw Throwables.propagate(ex);
private boolean generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final Map<Interval, List<ShardSpec>> shardSpecs, final String version, final FirehoseFactory firehoseFactory) throws IOException, InterruptedException {
final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
final Map<String, ShardSpec> sequenceNameToShardSpecMap = Maps.newHashMap();
if (toolbox.getMonitorScheduler() != null) {
toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
final SegmentAllocator segmentAllocator;
if (ingestionSchema.getIOConfig().isAppendToExisting()) {
segmentAllocator = new ActionBasedSegmentAllocator(toolbox.getTaskActionClient(), dataSchema);
} else {
segmentAllocator = new SegmentAllocator() {
public SegmentIdentifier allocate(DateTime timestamp, String sequenceName, String previousSegmentId) throws IOException {
Optional<Interval> interval = granularitySpec.bucketInterval(timestamp);
if (!interval.isPresent()) {
throw new ISE("Could not find interval for timestamp [%s]", timestamp);
ShardSpec shardSpec = sequenceNameToShardSpecMap.get(sequenceName);
if (shardSpec == null) {
throw new ISE("Could not find ShardSpec for sequenceName [%s]", sequenceName);
return new SegmentIdentifier(getDataSource(), interval.get(), version, shardSpec);
try (final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())) {
final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
final Map<Interval, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
if (driver.startJob() != null) {
try {
while (firehose.hasMore()) {
try {
final InputRow inputRow = firehose.nextRow();
final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
if (!optInterval.isPresent()) {
final Interval interval = optInterval.get();
if (!shardSpecLookups.containsKey(interval)) {
final List<ShardSpec> intervalShardSpecs = shardSpecs.get(interval);
if (intervalShardSpecs == null || intervalShardSpecs.isEmpty()) {
throw new ISE("Failed to get shardSpec for interval[%s]", interval);
shardSpecLookups.put(interval, intervalShardSpecs.get(0).getLookup(intervalShardSpecs));
final ShardSpec shardSpec = shardSpecLookups.get(interval).getShardSpec(inputRow.getTimestampFromEpoch(), inputRow);
final String sequenceName = String.format("index_%s_%s_%d", interval, version, shardSpec.getPartitionNum());
if (!sequenceNameToShardSpecMap.containsKey(sequenceName)) {
final ShardSpec shardSpecForPublishing = ingestionSchema.getTuningConfig().isForceExtendableShardSpecs() || ingestionSchema.getIOConfig().isAppendToExisting() ? new NumberedShardSpec(shardSpec.getPartitionNum(), shardSpecs.get(interval).size()) : shardSpec;
sequenceNameToShardSpecMap.put(sequenceName, shardSpecForPublishing);
final SegmentIdentifier identifier = driver.add(inputRow, sequenceName, committerSupplier);
if (identifier == null) {
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
} catch (ParseException e) {
if (ingestionSchema.getTuningConfig().isReportParseExceptions()) {
throw e;
} else {
} finally {
final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {
public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
final SegmentTransactionalInsertAction action = new SegmentTransactionalInsertAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action).isSuccess();
final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
if (published == null) {
log.error("Failed to publish segments, aborting!");
return false;
} else {"Published segments[%s]", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {
public String apply(DataSegment input) {
return input.getIdentifier();
return true;