use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DistinctCountTopNQueryTest method testTopNWithDistinctCountAgg.
@Test
public void testTopNWithDistinctCountAgg() throws Exception {
TopNQueryEngine engine = new TopNQueryEngine(new StupidPool<ByteBuffer>("TopNQueryEngine-bufferPool", new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
return ByteBuffer.allocate(1024 * 1024);
}
}));
IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
String visitor_id = "visitor_id";
String client_type = "client_type";
DateTime time = new DateTime("2016-03-04T00:00:00.000Z");
long timestamp = time.getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).intervals(QueryRunnerTestHelper.fullOnInterval).dimension(client_type).metric("UV").threshold(10).aggregators(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
final Iterable<Result<TopNResultValue>> results = Sequences.toList(engine.query(query, new IncrementalIndexStorageAdapter(index)), Lists.<Result<TopNResultValue>>newLinkedList());
List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(time, new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(client_type, "iphone", "UV", 2L, "rows", 2L), ImmutableMap.<String, Object>of(client_type, "android", "UV", 1L, "rows", 1L)))));
TestHelper.assertExpectedResults(expectedResults, results);
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class ParquetHadoopInputRowParser method parse.
/**
* imitate avro extension {@link AvroStreamInputRowParser#parseGenericRecord(GenericRecord, ParseSpec, List, boolean, boolean)}
*/
@Override
public InputRow parse(GenericRecord record) {
GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, false, binaryAsString);
TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DruidOrcInputFormatTest method testRead.
@Test
public void testRead() throws IOException, InterruptedException {
InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
reader.initialize(split, context);
reader.nextKeyValue();
OrcStruct data = (OrcStruct) reader.getCurrentValue();
MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
Assert.assertTrue(row.getEvent().keySet().size() == 4);
Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
Assert.assertEquals(col1, row.getEvent().get("col1"));
Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
reader.close();
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class ThriftInputRowParser method parse.
@Override
public InputRow parse(Object input) {
// Place it this initialization in constructor will get ClassNotFoundException
try {
if (thriftClass == null) {
thriftClass = getThriftClass();
}
} catch (IOException e) {
throw new IAE(e, "failed to load jar [%s]", jarPath);
} catch (ClassNotFoundException e) {
throw new IAE(e, "class [%s] not found in jar", thriftClassName);
} catch (InstantiationException | IllegalAccessException e) {
throw new IAE(e, "instantiation thrift instance failed");
}
final String json;
try {
if (input instanceof ByteBuffer) {
// realtime stream
final byte[] bytes = ((ByteBuffer) input).array();
TBase o = thriftClass.newInstance();
ThriftDeserialization.detectAndDeserialize(bytes, o);
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else if (input instanceof BytesWritable) {
// sequence file
final byte[] bytes = ((BytesWritable) input).getBytes();
TBase o = thriftClass.newInstance();
ThriftDeserialization.detectAndDeserialize(bytes, o);
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else if (input instanceof ThriftWritable) {
// LzoBlockThrift file
TBase o = (TBase) ((ThriftWritable) input).get();
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else {
throw new IAE("unsupport input class of [%s]", input.getClass());
}
} catch (IllegalAccessException | InstantiationException | TException e) {
throw new IAE("some thing wrong with your thrift?");
}
Map<String, Object> record = parser.parse(json);
return new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), parseSpec.getDimensionsSpec().getDimensionNames(), record);
}
use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class HadoopDruidIndexerConfigTest method testHashedBucketSelection.
@Test
public void testHashedBucketSelection() {
List<HadoopyShardSpec> specs = Lists.newArrayList();
final int partitionCount = 10;
for (int i = 0; i < partitionCount; i++) {
specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, partitionCount, null, new DefaultObjectMapper()), i));
}
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.MINUTE, Granularities.MINUTE, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar", "type", "static"), null, null), new HadoopTuningConfig(null, null, null, ImmutableMap.of(new DateTime("2010-01-01T01:00:00").getMillis(), specs), null, null, false, false, false, false, null, false, false, null, null, null, false, false));
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(spec);
final List<String> dims = Arrays.asList("diM1", "dIM2");
final ImmutableMap<String, Object> values = ImmutableMap.<String, Object>of("Dim1", "1", "DiM2", "2", "dim1", "3", "dim2", "4");
final long timestamp = new DateTime("2010-01-01T01:00:01").getMillis();
final Bucket expectedBucket = config.getBucket(new MapBasedInputRow(timestamp, dims, values)).get();
final long nextBucketTimestamp = Granularities.MINUTE.bucketEnd(new DateTime(timestamp)).getMillis();
// check that all rows having same set of dims and truncated timestamp hash to same bucket
for (int i = 0; timestamp + i < nextBucketTimestamp; i++) {
Assert.assertEquals(expectedBucket.partitionNum, config.getBucket(new MapBasedInputRow(timestamp + i, dims, values)).get().partitionNum);
}
}
Aggregations