Search in sources :

Example 31 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsNotMerged.

@Test
public void testMultipleRowsNotMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey1 = Capture.newInstance();
    Capture<BytesWritable> captureVal1 = Capture.newInstance();
    Capture<BytesWritable> captureKey2 = Capture.newInstance();
    Capture<BytesWritable> captureVal2 = Capture.newInstance();
    context.write(EasyMock.capture(captureKey1), EasyMock.capture(captureVal1));
    context.write(EasyMock.capture(captureKey2), EasyMock.capture(captureVal2));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey1.getValue() == key);
    Assert.assertTrue(captureKey2.getValue() == key);
    InputRow capturedRow1 = InputRowSerde.fromBytes(captureVal1.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow1.getDimensions());
    Assert.assertEquals(Arrays.asList("host1"), capturedRow1.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow1.getDimension("keywords"));
    Assert.assertEquals(10, capturedRow1.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow1.getRaw("unique_hosts")), 0.001);
    InputRow capturedRow2 = InputRowSerde.fromBytes(captureVal2.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow2.getDimensions());
    Assert.assertEquals(Arrays.asList("host2"), capturedRow2.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow2.getDimension("keywords"));
    Assert.assertEquals(5, capturedRow2.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow2.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 32 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsMerged.

@Test
public void testMultipleRowsMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey = Capture.newInstance();
    Capture<BytesWritable> captureVal = Capture.newInstance();
    context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey.getValue() == key);
    InputRow capturedRow = InputRowSerde.fromBytes(captureVal.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow.getDimensions());
    Assert.assertEquals(ImmutableList.of(), capturedRow.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow.getDimension("keywords"));
    Assert.assertEquals(15, capturedRow.getLongMetric("visited_sum"));
    Assert.assertEquals(2.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 33 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class InputRowSerdeTest method testThrowParseExceptions.

@Test(expected = ParseException.class)
public void testThrowParseExceptions() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    InputRowSerde.toBytes(in, aggregatorFactories, true);
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 34 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithMultipleFiles.

@Test
public void testReplayableFirehoseWithMultipleFiles() throws Exception {
    replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 1, 3, mapper);
    final boolean[] hasMore = { true };
    final int multiplicationFactor = 500;
    final InputRow finalRow = new MapBasedInputRow(DateTime.now(), Lists.newArrayList("dim4", "dim5"), ImmutableMap.<String, Object>of("dim4", "val12", "dim5", "val20", "met1", 30));
    expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
    expect(delegateFirehose.hasMore()).andAnswer(new IAnswer<Boolean>() {

        @Override
        public Boolean answer() throws Throwable {
            return hasMore[0];
        }
    }).anyTimes();
    expect(delegateFirehose.nextRow()).andReturn(testRows.get(0)).times(multiplicationFactor).andReturn(testRows.get(1)).times(multiplicationFactor).andReturn(testRows.get(2)).times(multiplicationFactor).andAnswer(new IAnswer<InputRow>() {

        @Override
        public InputRow answer() throws Throwable {
            hasMore[0] = false;
            return finalRow;
        }
    });
    delegateFirehose.close();
    replayAll();
    List<InputRow> testRowsMultiplied = Lists.newArrayList();
    for (InputRow row : testRows) {
        for (int i = 0; i < multiplicationFactor; i++) {
            testRowsMultiplied.add(row);
        }
    }
    testRowsMultiplied.add(finalRow);
    List<InputRow> rows = Lists.newArrayList();
    try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(testRowsMultiplied, rows);
    // now replay!
    rows.clear();
    try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(testRowsMultiplied, rows);
    verifyAll();
}
Also used : IAnswer(org.easymock.IAnswer) Firehose(io.druid.data.input.Firehose) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 35 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class SearchQueryRunnerTest method testSearchWithNullValueInDimension.

@Test
public void testSearchWithNullValueInDimension() throws Exception {
    IncrementalIndex<Aggregator> index = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()).build(), true, 10);
    index.add(new MapBasedInputRow(1481871600000L, Arrays.asList("name", "host"), ImmutableMap.<String, Object>of("name", "name1", "host", "host")));
    index.add(new MapBasedInputRow(1481871670000L, Arrays.asList("name", "table"), ImmutableMap.<String, Object>of("name", "name2", "table", "table")));
    SearchQuery searchQuery = Druids.newSearchQueryBuilder().dimensions(new DefaultDimensionSpec("table", "table")).dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).intervals(QueryRunnerTestHelper.fullOnInterval).context(ImmutableMap.<String, Object>of("searchStrategy", "cursorOnly")).build();
    QueryRunnerFactory factory = new SearchQueryRunnerFactory(selector, toolChest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    QueryRunner runner = factory.createRunner(new QueryableIndexSegment("asdf", TestIndex.persistRealtimeAndLoadMMapped(index)));
    List<SearchHit> expectedHits = Lists.newLinkedList();
    expectedHits.add(new SearchHit("table", "table", 1));
    expectedHits.add(new SearchHit("table", "", 1));
    checkSearchQuery(searchQuery, runner, expectedHits);
}
Also used : SearchQuery(io.druid.query.search.search.SearchQuery) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) SearchHit(io.druid.query.search.search.SearchHit) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Aggregator(io.druid.query.aggregation.Aggregator) DateTime(org.joda.time.DateTime) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) QueryRunner(io.druid.query.QueryRunner) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

MapBasedInputRow (io.druid.data.input.MapBasedInputRow)73 Test (org.junit.Test)51 DateTime (org.joda.time.DateTime)38 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)32 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)30 File (java.io.File)19 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)13 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)12 InputRow (io.druid.data.input.InputRow)11 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)11 Interval (org.joda.time.Interval)11 IOException (java.io.IOException)10 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 Row (io.druid.data.input.Row)7 TaskStatus (io.druid.indexing.common.TaskStatus)7 TaskToolbox (io.druid.indexing.common.TaskToolbox)7 TestIndexerMetadataStorageCoordinator (io.druid.indexing.test.TestIndexerMetadataStorageCoordinator)7 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)6 Pair (io.druid.java.util.common.Pair)6