use of io.druid.data.input.MapBasedRow in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testObjectColumnSelectorOnVaryingColumnSchema.
@Test
public void testObjectColumnSelectorOnVaryingColumnSchema() throws Exception {
IncrementalIndex index = indexCreator.createIndex();
index.add(new MapBasedInputRow(new DateTime("2014-09-01T00:00:00"), Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "hi")));
index.add(new MapBasedInputRow(new DateTime("2014-09-01T01:00:00"), Lists.newArrayList("billy", "sally"), ImmutableMap.<String, Object>of("billy", "hip", "sally", "hop")));
GroupByQueryEngine engine = makeGroupByQueryEngine();
final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(0, new DateTime().getMillis())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).addAggregator(new JavaScriptAggregatorFactory("fieldLength", Arrays.asList("sally", "billy"), "function(current, s, b) { return current + (s == null ? 0 : s.length) + (b == null ? 0 : b.length); }", "function() { return 0; }", "function(a,b) { return a + b; }", JavaScriptConfig.getEnabledInstance())).build(), new IncrementalIndexStorageAdapter(index));
final ArrayList<Row> results = Sequences.toList(rows, Lists.<Row>newArrayList());
Assert.assertEquals(2, results.size());
MapBasedRow row = (MapBasedRow) results.get(0);
Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L, "fieldLength", 2.0), row.getEvent());
row = (MapBasedRow) results.get(1);
Assert.assertEquals(ImmutableMap.of("billy", "hip", "sally", "hop", "cnt", 1L, "fieldLength", 6.0), row.getEvent());
}
use of io.druid.data.input.MapBasedRow in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testSanity.
@Test
public void testSanity() throws Exception {
IncrementalIndex index = indexCreator.createIndex();
index.add(new MapBasedInputRow(new DateTime().minus(1).getMillis(), Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "hi")));
index.add(new MapBasedInputRow(new DateTime().minus(1).getMillis(), Lists.newArrayList("sally"), ImmutableMap.<String, Object>of("sally", "bo")));
GroupByQueryEngine engine = makeGroupByQueryEngine();
final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(0, new DateTime().getMillis())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).build(), new IncrementalIndexStorageAdapter(index));
final ArrayList<Row> results = Sequences.toList(rows, Lists.<Row>newArrayList());
Assert.assertEquals(2, results.size());
MapBasedRow row = (MapBasedRow) results.get(0);
Assert.assertEquals(ImmutableMap.of("sally", "bo", "cnt", 1L), row.getEvent());
row = (MapBasedRow) results.get(1);
Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
}
use of io.druid.data.input.MapBasedRow in project druid by druid-io.
the class TimestampGroupByAggregationTest method testSimpleDataIngestionAndGroupByTest.
@Test
public void testSimpleDataIngestionAndGroupByTest() throws Exception {
String recordParser = "{\n" + " \"type\": \"string\",\n" + " \"parseSpec\": {\n" + " \"format\": \"tsv\",\n" + " \"timestampSpec\": {\n" + " \"column\": \"timestamp\",\n" + " \"format\": \"auto\"\n" + " },\n" + " \"dimensionsSpec\": {\n" + " \"dimensions\": [\n" + " \"product\"\n" + " ],\n" + " \"dimensionExclusions\": [],\n" + " \"spatialDimensions\": []\n" + " },\n" + " \"columns\": [\n" + " \"timestamp\",\n" + " \"cat\",\n" + " \"product\",\n" + " \"prefer\",\n" + " \"prefer2\",\n" + " \"pty_country\"\n" + " ]\n" + " }\n" + "}";
String aggregator = "[\n" + " {\n" + " \"type\": \"" + aggType + "\",\n" + " \"name\": \"" + aggField + "\",\n" + " \"fieldName\": \"timestamp\"\n" + " }\n" + "]";
String groupBy = "{\n" + " \"queryType\": \"groupBy\",\n" + " \"dataSource\": \"test_datasource\",\n" + " \"granularity\": \"MONTH\",\n" + " \"dimensions\": [\"product\"],\n" + " \"aggregations\": [\n" + " {\n" + " \"type\": \"" + aggType + "\",\n" + " \"name\": \"" + groupByField + "\",\n" + " \"fieldName\": \"" + aggField + "\"\n" + " }\n" + " ],\n" + " \"intervals\": [\n" + " \"2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z\"\n" + " ]\n" + "}";
ZipFile zip = new ZipFile(new File(this.getClass().getClassLoader().getResource("druid.sample.tsv.zip").toURI()));
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(zip.getInputStream(zip.getEntry("druid.sample.tsv")), recordParser, aggregator, 0, Granularities.MONTH, 100, groupBy);
List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
Assert.assertEquals(36, results.size());
Assert.assertEquals(expected, ((MapBasedRow) results.get(0)).getEvent().get(groupByField));
}
use of io.druid.data.input.MapBasedRow in project hive by apache.
the class DruidGroupByQueryRecordReader method nextKeyValue.
@Override
public boolean nextKeyValue() {
if (queryResultsIterator.hasNext()) {
final Row row = queryResultsIterator.next();
// currently druid supports only MapBasedRow as Jackson SerDe so it should safe to cast without check
currentRow = (MapBasedRow) row;
// @TODO move this out of here to org.apache.hadoop.hive.druid.serde.DruidSerDe
currentEvent = Maps.transformEntries(currentRow.getEvent(), (key, value1) -> {
if (timeExtractionFields.contains(key)) {
return ISODateTimeFormat.dateTimeParser().parseMillis((String) value1);
}
if (intFormattedTimeExtractionFields.contains(key)) {
return Integer.valueOf((String) value1);
}
return value1;
});
return true;
}
return false;
}
Aggregations