Search in sources :

Example 41 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsNotMerged.

@Test
public void testMultipleRowsNotMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey1 = Capture.newInstance();
    Capture<BytesWritable> captureVal1 = Capture.newInstance();
    Capture<BytesWritable> captureKey2 = Capture.newInstance();
    Capture<BytesWritable> captureVal2 = Capture.newInstance();
    context.write(EasyMock.capture(captureKey1), EasyMock.capture(captureVal1));
    context.write(EasyMock.capture(captureKey2), EasyMock.capture(captureVal2));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey1.getValue() == key);
    Assert.assertTrue(captureKey2.getValue() == key);
    InputRow capturedRow1 = InputRowSerde.fromBytes(captureVal1.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow1.getDimensions());
    Assert.assertEquals(Arrays.asList("host1"), capturedRow1.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow1.getDimension("keywords"));
    Assert.assertEquals(10, capturedRow1.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow1.getRaw("unique_hosts")), 0.001);
    InputRow capturedRow2 = InputRowSerde.fromBytes(captureVal2.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow2.getDimensions());
    Assert.assertEquals(Arrays.asList("host2"), capturedRow2.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow2.getDimension("keywords"));
    Assert.assertEquals(5, capturedRow2.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow2.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 42 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsMerged.

@Test
public void testMultipleRowsMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey = Capture.newInstance();
    Capture<BytesWritable> captureVal = Capture.newInstance();
    context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey.getValue() == key);
    InputRow capturedRow = InputRowSerde.fromBytes(captureVal.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow.getDimensions());
    Assert.assertEquals(ImmutableList.of(), capturedRow.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow.getDimension("keywords"));
    Assert.assertEquals(15, capturedRow.getLongMetric("visited_sum"));
    Assert.assertEquals(2.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 43 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class InputRowSerdeTest method testThrowParseExceptions.

@Test(expected = ParseException.class)
public void testThrowParseExceptions() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    InputRowSerde.toBytes(in, aggregatorFactories, true);
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 44 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class BatchDeltaIngestionTest method verifyRows.

private void verifyRows(List<ImmutableMap<String, Object>> expectedRows, List<InputRow> actualRows) {
    System.out.println("actualRows = " + actualRows);
    Assert.assertEquals(expectedRows.size(), actualRows.size());
    for (int i = 0; i < expectedRows.size(); i++) {
        Map<String, Object> expected = expectedRows.get(i);
        InputRow actual = actualRows.get(i);
        Assert.assertEquals(ImmutableList.of("host"), actual.getDimensions());
        Assert.assertEquals(expected.get("time"), actual.getTimestamp());
        Assert.assertEquals(expected.get("host"), actual.getDimension("host"));
        Assert.assertEquals(expected.get("visited_sum"), actual.getLongMetric("visited_sum"));
        Assert.assertEquals((Double) expected.get("unique_hosts"), (Double) HyperUniquesAggregatorFactory.estimateCardinality(actual.getRaw("unique_hosts")), 0.001);
    }
}
Also used : InputRow(io.druid.data.input.InputRow)

Example 45 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class SinkTest method testSwap.

@Test
public void testSwap() throws Exception {
    final DataSchema schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), new DefaultObjectMapper());
    final Interval interval = new Interval("2013-01-01/2013-01-02");
    final String version = new DateTime().toString();
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(100, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions());
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return Lists.newArrayList();
        }

        @Override
        public long getTimestampFromEpoch() {
            return new DateTime("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return new DateTime("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return Lists.newArrayList();
        }

        @Override
        public float getFloatMetric(String metric) {
            return 0;
        }

        @Override
        public long getLongMetric(String metric) {
            return 0L;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    });
    FireHydrant currHydrant = sink.getCurrHydrant();
    Assert.assertEquals(new Interval("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
    FireHydrant swapHydrant = sink.swap();
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return Lists.newArrayList();
        }

        @Override
        public long getTimestampFromEpoch() {
            return new DateTime("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return new DateTime("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return Lists.newArrayList();
        }

        @Override
        public float getFloatMetric(String metric) {
            return 0;
        }

        @Override
        public long getLongMetric(String metric) {
            return 0L;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    });
    Assert.assertEquals(currHydrant, swapHydrant);
    Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
    Assert.assertEquals(new Interval("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
    Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
Also used : Period(org.joda.time.Period) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DateTime(org.joda.time.DateTime) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) InputRow(io.druid.data.input.InputRow) List(java.util.List) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) FireHydrant(io.druid.segment.realtime.FireHydrant) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

InputRow (io.druid.data.input.InputRow)81 Test (org.junit.Test)35 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)24 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)22 File (java.io.File)18 Setup (org.openjdk.jmh.annotations.Setup)15 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)14 Firehose (io.druid.data.input.Firehose)12 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)12 IndexSpec (io.druid.segment.IndexSpec)11 ArrayList (java.util.ArrayList)11 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)10 DateTime (org.joda.time.DateTime)10 QueryableIndex (io.druid.segment.QueryableIndex)9 IOException (java.io.IOException)9 BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)8 Interval (org.joda.time.Interval)8 ParseException (io.druid.java.util.common.parsers.ParseException)7 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 DataSegment (io.druid.timeline.DataSegment)5