Search in sources :

Example 1 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class StringArrayWritable method fromBytes.

public static final InputRow fromBytes(byte[] data, AggregatorFactory[] aggs) {
    try {
        DataInput in = ByteStreams.newDataInput(data);
        //Read timestamp
        long timestamp = in.readLong();
        Map<String, Object> event = Maps.newHashMap();
        //Read dimensions
        List<String> dimensions = Lists.newArrayList();
        int dimNum = WritableUtils.readVInt(in);
        for (int i = 0; i < dimNum; i++) {
            String dimension = readString(in);
            dimensions.add(dimension);
            List<String> dimensionValues = readStringArray(in);
            if (dimensionValues == null) {
                continue;
            }
            if (dimensionValues.size() == 1) {
                event.put(dimension, dimensionValues.get(0));
            } else {
                event.put(dimension, dimensionValues);
            }
        }
        //Read metrics
        int metricSize = WritableUtils.readVInt(in);
        for (int i = 0; i < metricSize; i++) {
            String metric = readString(in);
            String type = getType(metric, aggs, i);
            if (type.equals("float")) {
                event.put(metric, in.readFloat());
            } else if (type.equals("long")) {
                event.put(metric, WritableUtils.readVLong(in));
            } else {
                ComplexMetricSerde serde = getComplexMetricSerde(type);
                byte[] value = readBytes(in);
                event.put(metric, serde.fromBytes(value, 0, value.length));
            }
        }
        return new MapBasedInputRow(timestamp, dimensions, event);
    } catch (IOException ex) {
        throw Throwables.propagate(ex);
    }
}
Also used : DataInput(java.io.DataInput) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IOException(java.io.IOException)

Example 2 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestore.

@Test(timeout = 60_000L)
public void testRestore() throws Exception {
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
        // Nothing should be published.
        Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
    }
    // Second run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Do a query, at this point the previous data should be loaded.
        Assert.assertEquals(1, sumMetric(task2, "rows"));
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar"))));
        // Stop the firehose, this will drain out existing events.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(2, sumMetric(task2, "rows"));
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 3 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    if (query.getContextValue("useOffheap", false)) {
        index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
    } else {
        index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
    }
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            if (in instanceof MapBasedRow) {
                try {
                    MapBasedRow row = (MapBasedRow) in;
                    accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
                } catch (IndexSizeExceededException e) {
                    throw new ResourceLimitExceededException(e.getMessage());
                }
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSpec(io.druid.query.dimension.DimensionSpec) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Granularity(io.druid.java.util.common.granularity.Granularity) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Pair(io.druid.java.util.common.Pair) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 4 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class DataSourceMetadataQueryTest method testMaxIngestedEventTime.

@Test
public void testMaxIngestedEventTime() throws Exception {
    final IncrementalIndex rtIndex = new OnheapIncrementalIndex(0L, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, 1000);
    ;
    final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner((QueryRunnerFactory) new DataSourceMetadataQueryRunnerFactory(QueryRunnerTestHelper.NOOP_QUERYWATCHER), new IncrementalIndexSegment(rtIndex, "test"), null);
    DateTime timestamp = new DateTime(System.currentTimeMillis());
    rtIndex.add(new MapBasedInputRow(timestamp.getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "x")));
    DataSourceMetadataQuery dataSourceMetadataQuery = Druids.newDataSourceMetadataQueryBuilder().dataSource("testing").build();
    Map<String, Object> context = new MapMaker().makeMap();
    context.put(Result.MISSING_SEGMENTS_KEY, Lists.newArrayList());
    Iterable<Result<DataSourceMetadataResultValue>> results = Sequences.toList(runner.run(dataSourceMetadataQuery, context), Lists.<Result<DataSourceMetadataResultValue>>newArrayList());
    DataSourceMetadataResultValue val = results.iterator().next().getValue();
    DateTime maxIngestedEventTime = val.getMaxIngestedEventTime();
    Assert.assertEquals(timestamp, maxIngestedEventTime);
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) MapMaker(com.google.common.collect.MapMaker) QueryRunner(io.druid.query.QueryRunner) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 5 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class TimeseriesQueryRunnerBonusTest method testOneRowAtATime.

@Test
public void testOneRowAtATime() throws Exception {
    final IncrementalIndex oneRowIndex = new OnheapIncrementalIndex(new DateTime("2012-01-01T00:00:00Z").getMillis(), Granularities.NONE, new AggregatorFactory[] {}, 1000);
    List<Result<TimeseriesResultValue>> results;
    oneRowIndex.add(new MapBasedInputRow(new DateTime("2012-01-01T00:00:00Z").getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "x")));
    results = runTimeseriesCount(oneRowIndex);
    Assert.assertEquals("index size", 1, oneRowIndex.size());
    Assert.assertEquals("result size", 1, results.size());
    Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp());
    Assert.assertEquals("result count metric", 1, (long) results.get(0).getValue().getLongMetric("rows"));
    oneRowIndex.add(new MapBasedInputRow(new DateTime("2012-01-01T00:00:00Z").getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "y")));
    results = runTimeseriesCount(oneRowIndex);
    Assert.assertEquals("index size", 2, oneRowIndex.size());
    Assert.assertEquals("result size", 1, results.size());
    Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp());
    Assert.assertEquals("result count metric", 2, (long) results.get(0).getValue().getLongMetric("rows"));
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Test(org.junit.Test)

Aggregations

MapBasedInputRow (io.druid.data.input.MapBasedInputRow)73 Test (org.junit.Test)51 DateTime (org.joda.time.DateTime)38 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)32 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)30 File (java.io.File)19 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)13 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)12 InputRow (io.druid.data.input.InputRow)11 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)11 Interval (org.joda.time.Interval)11 IOException (java.io.IOException)10 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 Row (io.druid.data.input.Row)7 TaskStatus (io.druid.indexing.common.TaskStatus)7 TaskToolbox (io.druid.indexing.common.TaskToolbox)7 TestIndexerMetadataStorageCoordinator (io.druid.indexing.test.TestIndexerMetadataStorageCoordinator)7 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)6 Pair (io.druid.java.util.common.Pair)6