Search in sources :

Example 36 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project hive by apache.

the class DruidStorageHandlerUtils method getDimensionsAndAggregates.

public static Pair<List<DimensionSchema>, AggregatorFactory[]> getDimensionsAndAggregates(List<String> columnNames, List<TypeInfo> columnTypes) {
    // Default, all columns that are not metrics or timestamp, are treated as dimensions
    final List<DimensionSchema> dimensions = new ArrayList<>();
    ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
    for (int i = 0; i < columnTypes.size(); i++) {
        final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
        AggregatorFactory af;
        switch(primitiveCategory) {
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
                af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case FLOAT:
                af = new FloatSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case DOUBLE:
                af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case DECIMAL:
                throw new UnsupportedOperationException(String.format("Druid does not support decimal column type cast column " + "[%s] to double", columnNames.get(i)));
            case TIMESTAMP:
                // Granularity column
                String tColumnName = columnNames.get(i);
                if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME) && !tColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
                    throw new IllegalArgumentException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            case TIMESTAMPLOCALTZ:
                // Druid timestamp column
                String tLocalTZColumnName = columnNames.get(i);
                if (!tLocalTZColumnName.equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
                    throw new IllegalArgumentException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            default:
                // Dimension
                String dColumnName = columnNames.get(i);
                if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
                    throw new IllegalArgumentException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
                }
                dimensions.add(new StringDimensionSchema(dColumnName));
                continue;
        }
        aggregatorFactoryBuilder.add(af);
    }
    ImmutableList<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
    return Pair.of(dimensions, aggregatorFactories.toArray(new AggregatorFactory[0]));
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Aggregations

StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)36 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)30 Test (org.junit.Test)24 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)19 LongDimensionSchema (org.apache.druid.data.input.impl.LongDimensionSchema)15 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)15 FloatDimensionSchema (org.apache.druid.data.input.impl.FloatDimensionSchema)14 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)12 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)11 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 File (java.io.File)8 ArrayList (java.util.ArrayList)8 Before (org.junit.Before)8 ImmutableList (com.google.common.collect.ImmutableList)7 HashMap (java.util.HashMap)7 DataSchema (org.apache.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)7 List (java.util.List)6 SupervisorStateManagerConfig (org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5