Search in sources :

Example 31 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class IncrementalIndexStorageAdapterTest method testObjectColumnSelectorOnVaryingColumnSchema.

@Test
public void testObjectColumnSelectorOnVaryingColumnSchema() throws Exception {
    IncrementalIndex index = indexCreator.createIndex();
    index.add(new MapBasedInputRow(new DateTime("2014-09-01T00:00:00"), Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "hi")));
    index.add(new MapBasedInputRow(new DateTime("2014-09-01T01:00:00"), Lists.newArrayList("billy", "sally"), ImmutableMap.<String, Object>of("billy", "hip", "sally", "hop")));
    GroupByQueryEngine engine = makeGroupByQueryEngine();
    final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(0, new DateTime().getMillis())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).addAggregator(new JavaScriptAggregatorFactory("fieldLength", Arrays.asList("sally", "billy"), "function(current, s, b) { return current + (s == null ? 0 : s.length) + (b == null ? 0 : b.length); }", "function() { return 0; }", "function(a,b) { return a + b; }", JavaScriptConfig.getEnabledInstance())).build(), new IncrementalIndexStorageAdapter(index));
    final ArrayList<Row> results = Sequences.toList(rows, Lists.<Row>newArrayList());
    Assert.assertEquals(2, results.size());
    MapBasedRow row = (MapBasedRow) results.get(0);
    Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L, "fieldLength", 2.0), row.getEvent());
    row = (MapBasedRow) results.get(1);
    Assert.assertEquals(ImmutableMap.of("billy", "hip", "sally", "hop", "cnt", 1L, "fieldLength", 6.0), row.getEvent());
}
Also used : LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) JavaScriptAggregatorFactory(io.druid.query.aggregation.JavaScriptAggregatorFactory) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) GroupByQueryEngine(io.druid.query.groupby.GroupByQueryEngine) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 32 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class IncrementalIndexStorageAdapterTest method testSanity.

@Test
public void testSanity() throws Exception {
    IncrementalIndex index = indexCreator.createIndex();
    index.add(new MapBasedInputRow(new DateTime().minus(1).getMillis(), Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "hi")));
    index.add(new MapBasedInputRow(new DateTime().minus(1).getMillis(), Lists.newArrayList("sally"), ImmutableMap.<String, Object>of("sally", "bo")));
    GroupByQueryEngine engine = makeGroupByQueryEngine();
    final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(0, new DateTime().getMillis())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).build(), new IncrementalIndexStorageAdapter(index));
    final ArrayList<Row> results = Sequences.toList(rows, Lists.<Row>newArrayList());
    Assert.assertEquals(2, results.size());
    MapBasedRow row = (MapBasedRow) results.get(0);
    Assert.assertEquals(ImmutableMap.of("sally", "bo", "cnt", 1L), row.getEvent());
    row = (MapBasedRow) results.get(1);
    Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
}
Also used : LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) GroupByQueryEngine(io.druid.query.groupby.GroupByQueryEngine) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 33 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class TimestampGroupByAggregationTest method testSimpleDataIngestionAndGroupByTest.

@Test
public void testSimpleDataIngestionAndGroupByTest() throws Exception {
    String recordParser = "{\n" + "  \"type\": \"string\",\n" + "  \"parseSpec\": {\n" + "    \"format\": \"tsv\",\n" + "    \"timestampSpec\": {\n" + "      \"column\": \"timestamp\",\n" + "      \"format\": \"auto\"\n" + "    },\n" + "    \"dimensionsSpec\": {\n" + "      \"dimensions\": [\n" + "        \"product\"\n" + "      ],\n" + "      \"dimensionExclusions\": [],\n" + "      \"spatialDimensions\": []\n" + "    },\n" + "    \"columns\": [\n" + "      \"timestamp\",\n" + "      \"cat\",\n" + "      \"product\",\n" + "      \"prefer\",\n" + "      \"prefer2\",\n" + "      \"pty_country\"\n" + "    ]\n" + "  }\n" + "}";
    String aggregator = "[\n" + "  {\n" + "    \"type\": \"" + aggType + "\",\n" + "    \"name\": \"" + aggField + "\",\n" + "    \"fieldName\": \"timestamp\"\n" + "  }\n" + "]";
    String groupBy = "{\n" + "  \"queryType\": \"groupBy\",\n" + "  \"dataSource\": \"test_datasource\",\n" + "  \"granularity\": \"MONTH\",\n" + "  \"dimensions\": [\"product\"],\n" + "  \"aggregations\": [\n" + "    {\n" + "      \"type\": \"" + aggType + "\",\n" + "      \"name\": \"" + groupByField + "\",\n" + "      \"fieldName\": \"" + aggField + "\"\n" + "    }\n" + "  ],\n" + "  \"intervals\": [\n" + "    \"2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z\"\n" + "  ]\n" + "}";
    ZipFile zip = new ZipFile(new File(this.getClass().getClassLoader().getResource("druid.sample.tsv.zip").toURI()));
    Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(zip.getInputStream(zip.getEntry("druid.sample.tsv")), recordParser, aggregator, 0, Granularities.MONTH, 100, groupBy);
    List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
    Assert.assertEquals(36, results.size());
    Assert.assertEquals(expected, ((MapBasedRow) results.get(0)).getEvent().get(groupByField));
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) ZipFile(java.util.zip.ZipFile) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) File(java.io.File) ZipFile(java.util.zip.ZipFile) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 34 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project hive by apache.

the class DruidGroupByQueryRecordReader method nextKeyValue.

@Override
public boolean nextKeyValue() {
    if (queryResultsIterator.hasNext()) {
        final Row row = queryResultsIterator.next();
        // currently druid supports only MapBasedRow as Jackson SerDe so it should safe to cast without check
        currentRow = (MapBasedRow) row;
        // @TODO move this out of here to org.apache.hadoop.hive.druid.serde.DruidSerDe
        currentEvent = Maps.transformEntries(currentRow.getEvent(), (key, value1) -> {
            if (timeExtractionFields.contains(key)) {
                return ISODateTimeFormat.dateTimeParser().parseMillis((String) value1);
            }
            if (intFormattedTimeExtractionFields.contains(key)) {
                return Integer.valueOf((String) value1);
            }
            return value1;
        });
        return true;
    }
    return false;
}
Also used : ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) NullWritable(org.apache.hadoop.io.NullWritable) InputSplit(org.apache.hadoop.mapreduce.InputSplit) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DimensionSpec(io.druid.query.dimension.DimensionSpec) IOException(java.io.IOException) GroupByQuery(io.druid.query.groupby.GroupByQuery) Collectors(java.util.stream.Collectors) Maps(com.google.common.collect.Maps) List(java.util.List) Lists(com.google.common.collect.Lists) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) ISO_TIME_FORMAT(org.apache.hadoop.hive.druid.serde.DruidSerDeUtils.ISO_TIME_FORMAT) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) JavaType(com.fasterxml.jackson.databind.JavaType) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) TypeReference(com.fasterxml.jackson.core.type.TypeReference) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) HttpClient(com.metamx.http.client.HttpClient) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Aggregations

MapBasedRow (io.druid.data.input.MapBasedRow)34 Test (org.junit.Test)21 Row (io.druid.data.input.Row)16 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)12 DateTime (org.joda.time.DateTime)11 File (java.io.File)9 Sequence (io.druid.java.util.common.guava.Sequence)7 Function (com.google.common.base.Function)6 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)4 AggregationTestHelper (io.druid.query.aggregation.AggregationTestHelper)4 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)4 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 List (java.util.List)4 Map (java.util.Map)4 Interval (org.joda.time.Interval)4 AggregatorsModule (io.druid.jackson.AggregatorsModule)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)3 GroupByQueryEngine (io.druid.query.groupby.GroupByQueryEngine)3 ISE (io.druid.java.util.common.ISE)2