use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class MapInputRowParser method parse.
@VisibleForTesting
static InputRow parse(TimestampSpec timestampSpec, DimensionsSpec dimensionsSpec, Map<String, Object> theMap) throws ParseException {
final List<String> dimensionsToUse = findDimensions(dimensionsSpec, theMap);
final DateTime timestamp;
try {
timestamp = timestampSpec.extractTimestamp(theMap);
} catch (Exception e) {
String rawMap = rawMapToPrint(theMap);
throw new ParseException(rawMap, e, "Timestamp[%s] is unparseable! Event: %s", timestampSpec.getRawTimestamp(theMap), rawMap);
}
if (timestamp == null) {
String rawMap = rawMapToPrint(theMap);
throw new ParseException(rawMap, "Timestamp[%s] is unparseable! Event: %s", timestampSpec.getRawTimestamp(theMap), rawMap);
}
if (!Intervals.ETERNITY.contains(timestamp)) {
String rawMap = rawMapToPrint(theMap);
throw new ParseException(rawMap, "Encountered row with timestamp[%s] that cannot be represented as a long: [%s]", timestamp, rawMap);
}
return new MapBasedInputRow(timestamp, dimensionsToUse, theMap);
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class CsvReaderTest method testQuotes.
@Test
public void testQuotes() throws IOException {
final ByteEntity source = writeData(ImmutableList.of("3,\"Lets do some \"\"normal\"\" quotes\",2018-05-05T10:00:00Z", "34,\"Lets do some \"\"normal\"\", quotes with comma\",2018-05-06T10:00:00Z", "343,\"Lets try \\\"\"it\\\"\" with slash quotes\",2018-05-07T10:00:00Z", "545,\"Lets try \\\"\"it\\\"\", with slash quotes and comma\",2018-05-08T10:00:00Z", "65,Here I write \\n slash n,2018-05-09T10:00:00Z"));
final List<InputRow> expectedResults = ImmutableList.of(new MapBasedInputRow(DateTimes.of("2018-05-05T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "3", "Comment", "Lets do some \"normal\" quotes", "Timestamp", "2018-05-05T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-06T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "34", "Comment", "Lets do some \"normal\", quotes with comma", "Timestamp", "2018-05-06T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-07T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "343", "Comment", "Lets try \\\"it\\\" with slash quotes", "Timestamp", "2018-05-07T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-08T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "545", "Comment", "Lets try \\\"it\\\", with slash quotes and comma", "Timestamp", "2018-05-08T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-09T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z")));
final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("Value", "Comment", "Timestamp"), null, null, false, 0);
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("Timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), ColumnsFilter.all()), source, null);
try (CloseableIterator<InputRow> iterator = reader.read()) {
final Iterator<InputRow> expectedRowIterator = expectedResults.iterator();
while (iterator.hasNext()) {
Assert.assertTrue(expectedRowIterator.hasNext());
Assert.assertEquals(expectedRowIterator.next(), iterator.next());
}
}
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DistinctCountTopNQueryTest method testTopNWithDistinctCountAgg.
@Test
public void testTopNWithDistinctCountAgg() throws Exception {
TopNQueryEngine engine = new TopNQueryEngine(pool);
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setMaxRowCount(1000).build();
String visitor_id = "visitor_id";
String client_type = "client_type";
DateTime time = DateTimes.of("2016-03-04T00:00:00.000Z");
long timestamp = time.getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).dimension(client_type).metric("UV").threshold(10).aggregators(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
final Iterable<Result<TopNResultValue>> results = engine.query(query, new IncrementalIndexStorageAdapter(index), null).toList();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(time, new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.of(client_type, "iphone", "UV", 2L, "rows", 2L), ImmutableMap.of(client_type, "android", "UV", 1L, "rows", 1L)))));
TestHelper.assertExpectedResults(expectedResults, results);
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.
@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setConcurrentEventAdd(true).setMaxRowCount(1000).build();
String visitor_id = "visitor_id";
String client_type = "client_type";
long timestamp = DateTimes.of("2010-01-01").getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(client_type, client_type)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DistinctCountTimeseriesQueryTest method testTimeseriesWithDistinctCountAgg.
@Test
public void testTimeseriesWithDistinctCountAgg() throws Exception {
TimeseriesQueryEngine engine = new TimeseriesQueryEngine();
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setMaxRowCount(1000).build();
String visitor_id = "visitor_id";
String client_type = "client_type";
DateTime time = DateTimes.of("2016-03-04T00:00:00.000Z");
long timestamp = time.getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
final Iterable<Result<TimeseriesResultValue>> results = engine.process(query, new IncrementalIndexStorageAdapter(index)).toList();
List<Result<TimeseriesResultValue>> expectedResults = Collections.singletonList(new Result<>(time, new TimeseriesResultValue(ImmutableMap.of("UV", 3, "rows", 3L))));
TestHelper.assertExpectedResults(expectedResults, results);
}
Aggregations