use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class StringArrayWritable method fromBytes.
public static final InputRow fromBytes(byte[] data, AggregatorFactory[] aggs) {
try {
DataInput in = ByteStreams.newDataInput(data);
//Read timestamp
long timestamp = in.readLong();
Map<String, Object> event = Maps.newHashMap();
//Read dimensions
List<String> dimensions = Lists.newArrayList();
int dimNum = WritableUtils.readVInt(in);
for (int i = 0; i < dimNum; i++) {
String dimension = readString(in);
dimensions.add(dimension);
List<String> dimensionValues = readStringArray(in);
if (dimensionValues == null) {
continue;
}
if (dimensionValues.size() == 1) {
event.put(dimension, dimensionValues.get(0));
} else {
event.put(dimension, dimensionValues);
}
}
//Read metrics
int metricSize = WritableUtils.readVInt(in);
for (int i = 0; i < metricSize; i++) {
String metric = readString(in);
String type = getType(metric, aggs, i);
if (type.equals("float")) {
event.put(metric, in.readFloat());
} else if (type.equals("long")) {
event.put(metric, WritableUtils.readVLong(in));
} else {
ComplexMetricSerde serde = getComplexMetricSerde(type);
byte[] value = readBytes(in);
event.put(metric, serde.fromBytes(value, 0, value.length));
}
}
return new MapBasedInputRow(timestamp, dimensions, event);
} catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class RealtimeIndexTaskTest method testRestore.
@Test(timeout = 60_000L)
public void testRestore() throws Exception {
final File directory = tempFolder.newFolder();
final RealtimeIndexTask task1 = makeRealtimeTask(null);
final DataSegment publishedSegment;
// First run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
// Trigger graceful shutdown.
task1.stopGracefully();
// Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
// Nothing should be published.
Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
}
// Second run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task2.getFirehose() == null) {
Thread.sleep(50);
}
// Do a query, at this point the previous data should be loaded.
Assert.assertEquals(1, sumMetric(task2, "rows"));
final TestFirehose firehose = (TestFirehose) task2.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar"))));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query.
Assert.assertEquals(2, sumMetric(task2, "rows"));
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
if (query.getContextValue("useOffheap", false)) {
index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
} else {
index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
}
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
if (in instanceof MapBasedRow) {
try {
MapBasedRow row = (MapBasedRow) in;
accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DataSourceMetadataQueryTest method testMaxIngestedEventTime.
@Test
public void testMaxIngestedEventTime() throws Exception {
final IncrementalIndex rtIndex = new OnheapIncrementalIndex(0L, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, 1000);
;
final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner((QueryRunnerFactory) new DataSourceMetadataQueryRunnerFactory(QueryRunnerTestHelper.NOOP_QUERYWATCHER), new IncrementalIndexSegment(rtIndex, "test"), null);
DateTime timestamp = new DateTime(System.currentTimeMillis());
rtIndex.add(new MapBasedInputRow(timestamp.getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "x")));
DataSourceMetadataQuery dataSourceMetadataQuery = Druids.newDataSourceMetadataQueryBuilder().dataSource("testing").build();
Map<String, Object> context = new MapMaker().makeMap();
context.put(Result.MISSING_SEGMENTS_KEY, Lists.newArrayList());
Iterable<Result<DataSourceMetadataResultValue>> results = Sequences.toList(runner.run(dataSourceMetadataQuery, context), Lists.<Result<DataSourceMetadataResultValue>>newArrayList());
DataSourceMetadataResultValue val = results.iterator().next().getValue();
DateTime maxIngestedEventTime = val.getMaxIngestedEventTime();
Assert.assertEquals(timestamp, maxIngestedEventTime);
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class TimeseriesQueryRunnerBonusTest method testOneRowAtATime.
@Test
public void testOneRowAtATime() throws Exception {
final IncrementalIndex oneRowIndex = new OnheapIncrementalIndex(new DateTime("2012-01-01T00:00:00Z").getMillis(), Granularities.NONE, new AggregatorFactory[] {}, 1000);
List<Result<TimeseriesResultValue>> results;
oneRowIndex.add(new MapBasedInputRow(new DateTime("2012-01-01T00:00:00Z").getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "x")));
results = runTimeseriesCount(oneRowIndex);
Assert.assertEquals("index size", 1, oneRowIndex.size());
Assert.assertEquals("result size", 1, results.size());
Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp());
Assert.assertEquals("result count metric", 1, (long) results.get(0).getValue().getLongMetric("rows"));
oneRowIndex.add(new MapBasedInputRow(new DateTime("2012-01-01T00:00:00Z").getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "y")));
results = runTimeseriesCount(oneRowIndex);
Assert.assertEquals("index size", 2, oneRowIndex.size());
Assert.assertEquals("result size", 1, results.size());
Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp());
Assert.assertEquals("result count metric", 2, (long) results.get(0).getValue().getLongMetric("rows"));
}
Aggregations