Search in sources :

Example 31 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class QueryableIndexColumnCapabilitiesTest method setup.

@BeforeClass
public static void setup() throws IOException {
    MapInputRowParser parser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>builder().addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))).add(new DoubleDimensionSchema("d3")).add(new FloatDimensionSchema("d4")).add(new LongDimensionSchema("d5")).build())));
    AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "d3"), new FloatSumAggregatorFactory("m2", "d4"), new LongSumAggregatorFactory("m3", "d5"), new HyperUniquesAggregatorFactory("m4", "d1") };
    List<InputRow> rows = new ArrayList<>();
    Map<String, Object> event = ImmutableMap.<String, Object>builder().put("time", DateTimes.nowUtc().getMillis()).put("d1", "some string").put("d2", ImmutableList.of("some", "list")).put("d3", 1.234).put("d4", 1.234f).put("d5", 10L).build();
    rows.add(Iterables.getOnlyElement(parser.parseBatch(event)));
    IndexBuilder builder = IndexBuilder.create().rows(rows).schema(new IncrementalIndexSchema.Builder().withMetrics(metricsSpecs).withDimensionsSpec(parser).withRollup(false).build()).tmpDir(temporaryFolder.newFolder());
    INC_INDEX = builder.buildIncrementalIndex();
    MMAP_INDEX = builder.buildMMappedIndex();
    List<InputRow> rowsWithNulls = new ArrayList<>();
    rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event)));
    Map<String, Object> eventWithNulls = new HashMap<>();
    eventWithNulls.put("time", DateTimes.nowUtc().getMillis());
    eventWithNulls.put("d1", null);
    eventWithNulls.put("d2", ImmutableList.of());
    eventWithNulls.put("d3", null);
    eventWithNulls.put("d4", null);
    eventWithNulls.put("d5", null);
    rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls)));
    IndexBuilder builderWithNulls = IndexBuilder.create().rows(rowsWithNulls).schema(new IncrementalIndexSchema.Builder().withMetrics(metricsSpecs).withDimensionsSpec(parser).withRollup(false).build()).tmpDir(temporaryFolder.newFolder());
    INC_INDEX_WITH_NULLS = builderWithNulls.buildIncrementalIndex();
    MMAP_INDEX_WITH_NULLS = builderWithNulls.buildMMappedIndex();
}
Also used : MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) HashMap(java.util.HashMap) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ArrayList(java.util.ArrayList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) BeforeClass(org.junit.BeforeClass)

Example 32 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class IncrementalIndex method makeColumnSelectorFactory.

/**
 * Column selector used at ingestion time for inputs to aggregators.
 *
 * @param agg                       the aggregator
 * @param in                        ingestion-time input row supplier
 * @param deserializeComplexMetrics whether complex objects should be deserialized by a {@link ComplexMetricExtractor}
 *
 * @return column selector factory
 */
public static ColumnSelectorFactory makeColumnSelectorFactory(final VirtualColumns virtualColumns, final AggregatorFactory agg, final Supplier<InputRow> in, final boolean deserializeComplexMetrics) {
    // we use RowSignature.empty() because ColumnInspector here should be the InputRow schema, not the
    // IncrementalIndex schema, because we are reading values from the InputRow
    final RowBasedColumnSelectorFactory<InputRow> baseSelectorFactory = RowBasedColumnSelectorFactory.create(RowAdapters.standardRow(), in::get, RowSignature.empty(), true);
    class IncrementalIndexInputRowColumnSelectorFactory implements ColumnSelectorFactory {

        @Override
        public ColumnValueSelector<?> makeColumnValueSelector(final String column) {
            final boolean isComplexMetric = agg.getIntermediateType().is(ValueType.COMPLEX);
            final ColumnValueSelector selector = baseSelectorFactory.makeColumnValueSelector(column);
            if (!isComplexMetric || !deserializeComplexMetrics) {
                return selector;
            } else {
                // Wrap selector in a special one that uses ComplexMetricSerde to modify incoming objects.
                // For complex aggregators that read from multiple columns, we wrap all of them. This is not ideal but it
                // has worked so far.
                final String complexTypeName = agg.getIntermediateType().getComplexTypeName();
                final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(complexTypeName);
                if (serde == null) {
                    throw new ISE("Don't know how to handle type[%s]", complexTypeName);
                }
                final ComplexMetricExtractor extractor = serde.getExtractor();
                return new ColumnValueSelector() {

                    @Override
                    public boolean isNull() {
                        return selector.isNull();
                    }

                    @Override
                    public long getLong() {
                        return selector.getLong();
                    }

                    @Override
                    public float getFloat() {
                        return selector.getFloat();
                    }

                    @Override
                    public double getDouble() {
                        return selector.getDouble();
                    }

                    @Override
                    public Class classOfObject() {
                        return extractor.extractedClass();
                    }

                    @Nullable
                    @Override
                    public Object getObject() {
                        // Here is where the magic happens: read from "in" directly, don't go through the normal "selector".
                        return extractor.extractValue(in.get(), column, agg);
                    }

                    @Override
                    public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        inspector.visit("in", in);
                        inspector.visit("selector", selector);
                        inspector.visit("extractor", extractor);
                    }
                };
            }
        }

        @Override
        public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
            return baseSelectorFactory.makeDimensionSelector(dimensionSpec);
        }

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String columnName) {
            return baseSelectorFactory.getColumnCapabilities(columnName);
        }
    }
    return virtualColumns.wrap(new IncrementalIndexInputRowColumnSelectorFactory());
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ComplexMetricSerde(org.apache.druid.segment.serde.ComplexMetricSerde) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ComplexMetricExtractor(org.apache.druid.segment.serde.ComplexMetricExtractor) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ISE(org.apache.druid.java.util.common.ISE) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) NilColumnValueSelector(org.apache.druid.segment.NilColumnValueSelector)

Example 33 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ProtobufReader method parseInputRows.

@Override
protected List<InputRow> parseInputRows(DynamicMessage intermediateRow) throws ParseException, JsonProcessingException {
    Map<String, Object> record;
    if (flattenSpec == null || JSONPathSpec.DEFAULT.equals(flattenSpec)) {
        try {
            record = CollectionUtils.mapKeys(intermediateRow.getAllFields(), k -> k.getJsonName());
        } catch (Exception ex) {
            throw new ParseException(null, ex, "Protobuf message could not be parsed");
        }
    } else {
        try {
            String json = JsonFormat.printer().print(intermediateRow);
            record = recordFlattener.flatten(OBJECT_MAPPER.readValue(json, JsonNode.class));
        } catch (InvalidProtocolBufferException e) {
            throw new ParseException(null, e, "Protobuf message could not be parsed");
        }
    }
    return Collections.singletonList(MapInputRowParser.parse(inputRowSchema, record));
}
Also used : DynamicMessage(com.google.protobuf.DynamicMessage) ParseException(org.apache.druid.java.util.common.parsers.ParseException) ObjectFlattener(org.apache.druid.java.util.common.parsers.ObjectFlattener) CollectionUtils(org.apache.druid.utils.CollectionUtils) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ByteBuffer(java.nio.ByteBuffer) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) Map(java.util.Map) JsonNode(com.fasterxml.jackson.databind.JsonNode) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) JSONFlattenerMaker(org.apache.druid.java.util.common.parsers.JSONFlattenerMaker) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) IOUtils(org.apache.commons.io.IOUtils) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) IntermediateRowParsingReader(org.apache.druid.data.input.IntermediateRowParsingReader) CloseableIterators(org.apache.druid.java.util.common.CloseableIterators) JsonFormat(com.google.protobuf.util.JsonFormat) ObjectFlatteners(org.apache.druid.java.util.common.parsers.ObjectFlatteners) InputEntity(org.apache.druid.data.input.InputEntity) Collections(java.util.Collections) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException)

Example 34 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ProtobufInputFormatTest method testParseNestedData.

@Test
public void testParseNestedData() throws Exception {
    // configure parser with desc file
    ProtobufInputFormat protobufInputFormat = new ProtobufInputFormat(flattenSpec, decoder);
    // create binary of proto test event
    DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
    ProtoTestEventWrapper.ProtoTestEvent event = ProtobufInputRowParserTest.buildNestedData(dateTime);
    final ByteEntity entity = new ByteEntity(ProtobufInputRowParserTest.toByteBuffer(event));
    InputRow row = protobufInputFormat.createReader(new InputRowSchema(timestampSpec, dimensionsSpec, null), entity, null).read().next();
    ProtobufInputRowParserTest.verifyNestedData(row, dateTime);
}
Also used : ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRow(org.apache.druid.data.input.InputRow) InputRowSchema(org.apache.druid.data.input.InputRowSchema) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 35 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ProtobufInputFormatTest method testParseFlatData.

@Test
public void testParseFlatData() throws Exception {
    // configure parser with desc file
    ProtobufInputFormat protobufInputFormat = new ProtobufInputFormat(null, decoder);
    // create binary of proto test event
    DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
    ProtoTestEventWrapper.ProtoTestEvent event = ProtobufInputRowParserTest.buildFlatData(dateTime);
    final ByteEntity entity = new ByteEntity(ProtobufInputRowParserTest.toByteBuffer(event));
    InputRow row = protobufInputFormat.createReader(new InputRowSchema(timestampSpec, dimensionsSpec, null), entity, null).read().next();
    ProtobufInputRowParserTest.verifyFlatData(row, dateTime);
}
Also used : ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRow(org.apache.druid.data.input.InputRow) InputRowSchema(org.apache.druid.data.input.InputRowSchema) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

InputRow (org.apache.druid.data.input.InputRow)266 Test (org.junit.Test)193 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)57 InputEntityReader (org.apache.druid.data.input.InputEntityReader)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)52 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)52 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)49 ArrayList (java.util.ArrayList)46 List (java.util.List)37 ImmutableList (com.google.common.collect.ImmutableList)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)29 File (java.io.File)27 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)27 DateTime (org.joda.time.DateTime)24 Map (java.util.Map)23 IOException (java.io.IOException)18 Interval (org.joda.time.Interval)18