Search in sources :

Example 66 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class DistinctCountTopNQueryTest method testTopNWithDistinctCountAgg.

@Test
public void testTopNWithDistinctCountAgg() throws Exception {
    TopNQueryEngine engine = new TopNQueryEngine(new StupidPool<ByteBuffer>("TopNQueryEngine-bufferPool", new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            return ByteBuffer.allocate(1024 * 1024);
        }
    }));
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    DateTime time = new DateTime("2016-03-04T00:00:00.000Z");
    long timestamp = time.getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).intervals(QueryRunnerTestHelper.fullOnInterval).dimension(client_type).metric("UV").threshold(10).aggregators(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Iterable<Result<TopNResultValue>> results = Sequences.toList(engine.query(query, new IncrementalIndexStorageAdapter(index)), Lists.<Result<TopNResultValue>>newLinkedList());
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(time, new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(client_type, "iphone", "UV", 2L, "rows", 2L), ImmutableMap.<String, Object>of(client_type, "android", "UV", 1L, "rows", 1L)))));
    TestHelper.assertExpectedResults(expectedResults, results);
}
Also used : TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) TopNResultValue(io.druid.query.topn.TopNResultValue) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) ByteBuffer(java.nio.ByteBuffer) DateTime(org.joda.time.DateTime) TopNQueryEngine(io.druid.query.topn.TopNQueryEngine) Result(io.druid.query.Result) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) TopNQuery(io.druid.query.topn.TopNQuery) Supplier(com.google.common.base.Supplier) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 67 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class ParquetHadoopInputRowParser method parse.

/**
   * imitate avro extension {@link AvroStreamInputRowParser#parseGenericRecord(GenericRecord, ParseSpec, List, boolean, boolean)}
   */
@Override
public InputRow parse(GenericRecord record) {
    GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, false, binaryAsString);
    TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
    DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
    return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
Also used : GenericRecordAsMap(io.druid.data.input.avro.GenericRecordAsMap) TimestampSpec(io.druid.data.input.impl.TimestampSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime)

Example 68 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class DruidOrcInputFormatTest method testRead.

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
    reader.initialize(split, context);
    reader.nextKeyValue();
    OrcStruct data = (OrcStruct) reader.getCurrentValue();
    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
    reader.close();
}
Also used : OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcNewInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 69 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class ThriftInputRowParser method parse.

@Override
public InputRow parse(Object input) {
    // Place it this initialization in constructor will get ClassNotFoundException
    try {
        if (thriftClass == null) {
            thriftClass = getThriftClass();
        }
    } catch (IOException e) {
        throw new IAE(e, "failed to load jar [%s]", jarPath);
    } catch (ClassNotFoundException e) {
        throw new IAE(e, "class [%s] not found in jar", thriftClassName);
    } catch (InstantiationException | IllegalAccessException e) {
        throw new IAE(e, "instantiation thrift instance failed");
    }
    final String json;
    try {
        if (input instanceof ByteBuffer) {
            // realtime stream
            final byte[] bytes = ((ByteBuffer) input).array();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof BytesWritable) {
            // sequence file
            final byte[] bytes = ((BytesWritable) input).getBytes();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof ThriftWritable) {
            // LzoBlockThrift file
            TBase o = (TBase) ((ThriftWritable) input).get();
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else {
            throw new IAE("unsupport input class of [%s]", input.getClass());
        }
    } catch (IllegalAccessException | InstantiationException | TException e) {
        throw new IAE("some thing wrong with your thrift?");
    }
    Map<String, Object> record = parser.parse(json);
    return new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), parseSpec.getDimensionsSpec().getDimensionNames(), record);
}
Also used : TException(org.apache.thrift.TException) ThriftWritable(com.twitter.elephantbird.mapreduce.io.ThriftWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) IAE(com.metamx.common.IAE) ByteBuffer(java.nio.ByteBuffer) TBase(org.apache.thrift.TBase) MapBasedInputRow(io.druid.data.input.MapBasedInputRow)

Example 70 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class HadoopDruidIndexerConfigTest method testHashedBucketSelection.

@Test
public void testHashedBucketSelection() {
    List<HadoopyShardSpec> specs = Lists.newArrayList();
    final int partitionCount = 10;
    for (int i = 0; i < partitionCount; i++) {
        specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, partitionCount, null, new DefaultObjectMapper()), i));
    }
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.MINUTE, Granularities.MINUTE, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar", "type", "static"), null, null), new HadoopTuningConfig(null, null, null, ImmutableMap.of(new DateTime("2010-01-01T01:00:00").getMillis(), specs), null, null, false, false, false, false, null, false, false, null, null, null, false, false));
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(spec);
    final List<String> dims = Arrays.asList("diM1", "dIM2");
    final ImmutableMap<String, Object> values = ImmutableMap.<String, Object>of("Dim1", "1", "DiM2", "2", "dim1", "3", "dim2", "4");
    final long timestamp = new DateTime("2010-01-01T01:00:01").getMillis();
    final Bucket expectedBucket = config.getBucket(new MapBasedInputRow(timestamp, dims, values)).get();
    final long nextBucketTimestamp = Granularities.MINUTE.bucketEnd(new DateTime(timestamp)).getMillis();
    // check that all rows having same set of dims and truncated timestamp hash to same bucket
    for (int i = 0; timestamp + i < nextBucketTimestamp; i++) {
        Assert.assertEquals(expectedBucket.partitionNum, config.getBucket(new MapBasedInputRow(timestamp + i, dims, values)).get().partitionNum);
    }
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

MapBasedInputRow (io.druid.data.input.MapBasedInputRow)73 Test (org.junit.Test)51 DateTime (org.joda.time.DateTime)38 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)32 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)30 File (java.io.File)19 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)13 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)12 InputRow (io.druid.data.input.InputRow)11 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)11 Interval (org.joda.time.Interval)11 IOException (java.io.IOException)10 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 Row (io.druid.data.input.Row)7 TaskStatus (io.druid.indexing.common.TaskStatus)7 TaskToolbox (io.druid.indexing.common.TaskToolbox)7 TestIndexerMetadataStorageCoordinator (io.druid.indexing.test.TestIndexerMetadataStorageCoordinator)7 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)6 Pair (io.druid.java.util.common.Pair)6