Search in sources :

Example 61 with DimensionSpec

use of io.druid.query.dimension.DimensionSpec in project hive by apache.

the class DruidSerDe method inferSchema.

/* GroupBy query */
private void inferSchema(GroupByQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Dimension columns
    for (DimensionSpec ds : query.getDimensions()) {
        columnNames.add(ds.getOutputName());
        columnTypes.add(TypeInfoFactory.stringTypeInfo);
    }
    // Aggregator columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        columnNames.add(af.getName());
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
    }
    // different types for post-aggregation functions
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        columnNames.add(pa.getName());
        columnTypes.add(TypeInfoFactory.floatTypeInfo);
    }
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 62 with DimensionSpec

use of io.druid.query.dimension.DimensionSpec in project hive by apache.

the class DruidSerDe method inferSchema.

/* Select query */
private void inferSchema(SelectQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes, String address) throws SerDeException {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Dimension columns
    for (DimensionSpec ds : query.getDimensions()) {
        columnNames.add(ds.getOutputName());
        columnTypes.add(TypeInfoFactory.stringTypeInfo);
    }
    // The type for metric columns is not explicit in the query, thus in this case
    // we need to emit a metadata query to know their type
    SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
    builder.dataSource(query.getDataSource());
    builder.merge(true);
    builder.analysisTypes();
    SegmentMetadataQuery metadataQuery = builder.build();
    // Execute query in Druid
    SegmentAnalysis schemaInfo;
    try {
        schemaInfo = submitMetadataRequest(address, metadataQuery);
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    if (schemaInfo == null) {
        throw new SerDeException("Connected to Druid but could not retrieve datasource information");
    }
    for (String metric : query.getMetrics()) {
        columnNames.add(metric);
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(schemaInfo.getColumns().get(metric).getType()));
    }
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) SegmentMetadataQueryBuilder(io.druid.query.Druids.SegmentMetadataQueryBuilder) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 63 with DimensionSpec

use of io.druid.query.dimension.DimensionSpec in project hive by apache.

the class DruidGroupByQueryRecordReader method initDimensionTypes.

private void initDimensionTypes() throws IOException {
    // @TODO move this out of here to org.apache.hadoop.hive.druid.serde.DruidSerDe
    List<DimensionSpec> dimensionSpecList = ((GroupByQuery) query).getDimensions();
    List<DimensionSpec> extractionDimensionSpecList = dimensionSpecList.stream().filter(dimensionSpecs -> dimensionSpecs instanceof ExtractionDimensionSpec).collect(Collectors.toList());
    extractionDimensionSpecList.stream().forEach(dimensionSpec -> {
        ExtractionDimensionSpec extractionDimensionSpec = (ExtractionDimensionSpec) dimensionSpec;
        if (extractionDimensionSpec.getExtractionFn() instanceof TimeFormatExtractionFn) {
            final TimeFormatExtractionFn timeFormatExtractionFn = (TimeFormatExtractionFn) extractionDimensionSpec.getExtractionFn();
            if (timeFormatExtractionFn == null || timeFormatExtractionFn.getFormat().equals(ISO_TIME_FORMAT)) {
                timeExtractionFields.add(extractionDimensionSpec.getOutputName());
            } else {
                intFormattedTimeExtractionFields.add(extractionDimensionSpec.getOutputName());
            }
        }
    });
}
Also used : ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) NullWritable(org.apache.hadoop.io.NullWritable) InputSplit(org.apache.hadoop.mapreduce.InputSplit) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DimensionSpec(io.druid.query.dimension.DimensionSpec) IOException(java.io.IOException) GroupByQuery(io.druid.query.groupby.GroupByQuery) Collectors(java.util.stream.Collectors) Maps(com.google.common.collect.Maps) List(java.util.List) Lists(com.google.common.collect.Lists) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) ISO_TIME_FORMAT(org.apache.hadoop.hive.druid.serde.DruidSerDeUtils.ISO_TIME_FORMAT) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) JavaType(com.fasterxml.jackson.databind.JavaType) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) TypeReference(com.fasterxml.jackson.core.type.TypeReference) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) HttpClient(com.metamx.http.client.HttpClient) DimensionSpec(io.druid.query.dimension.DimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) GroupByQuery(io.druid.query.groupby.GroupByQuery) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec)

Example 64 with DimensionSpec

use of io.druid.query.dimension.DimensionSpec in project hive by apache.

the class DruidGroupByQueryRecordReader method next.

@Override
public boolean next(NullWritable key, DruidWritable value) {
    if (nextKeyValue()) {
        // Update value
        value.getValue().clear();
        // 1) The timestamp column
        value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
        // 2) The dimension columns
        for (int i = 0; i < query.getDimensions().size(); i++) {
            DimensionSpec ds = query.getDimensions().get(i);
            List<String> dims = current.getDimension(ds.getDimension());
            if (dims.size() == 0) {
                // NULL value for dimension
                value.getValue().put(ds.getOutputName(), null);
            } else {
                int pos = dims.size() - indexes[i] - 1;
                value.getValue().put(ds.getOutputName(), dims.get(pos));
            }
        }
        int counter = 0;
        // 3) The aggregation columns
        for (AggregatorFactory af : query.getAggregatorSpecs()) {
            switch(extractors[counter++]) {
                case FLOAT:
                    value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
                    break;
                case LONG:
                    value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
                    break;
            }
        }
        // 4) The post-aggregation columns
        for (PostAggregator pa : query.getPostAggregatorSpecs()) {
            assert extractors[counter++] == Extract.FLOAT;
            value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
        }
        return true;
    }
    return false;
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 65 with DimensionSpec

use of io.druid.query.dimension.DimensionSpec in project hive by apache.

the class DruidGroupByQueryRecordReader method getCurrentValue.

@Override
public DruidWritable getCurrentValue() throws IOException, InterruptedException {
    // Create new value
    DruidWritable value = new DruidWritable();
    // 1) The timestamp column
    value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
    // 2) The dimension columns
    for (int i = 0; i < query.getDimensions().size(); i++) {
        DimensionSpec ds = query.getDimensions().get(i);
        List<String> dims = current.getDimension(ds.getDimension());
        if (dims.size() == 0) {
            // NULL value for dimension
            value.getValue().put(ds.getOutputName(), null);
        } else {
            int pos = dims.size() - indexes[i] - 1;
            value.getValue().put(ds.getOutputName(), dims.get(pos));
        }
    }
    int counter = 0;
    // 3) The aggregation columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        switch(extractors[counter++]) {
            case FLOAT:
                value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
                break;
            case LONG:
                value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
                break;
        }
    }
    // 4) The post-aggregation columns
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        assert extractors[counter++] == Extract.FLOAT;
        value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
    }
    return value;
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Aggregations

DimensionSpec (io.druid.query.dimension.DimensionSpec)113 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)85 Test (org.junit.Test)77 Row (io.druid.data.input.Row)65 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)61 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)60 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)59 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)57 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)18 OrderByColumnSpec (io.druid.query.groupby.orderby.OrderByColumnSpec)15 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)13 HashMap (java.util.HashMap)13 DateTime (org.joda.time.DateTime)13 Interval (org.joda.time.Interval)13 PostAggregator (io.druid.query.aggregation.PostAggregator)12 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)11 JavaScriptDimFilter (io.druid.query.filter.JavaScriptDimFilter)11 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)10 DefaultLimitSpec (io.druid.query.groupby.orderby.DefaultLimitSpec)10 ISE (io.druid.java.util.common.ISE)9