Search in sources :

Example 71 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project cdap by caskdata.

the class ObjectInspectorFactory method getReflectionObjectInspectorNoCache.

private static ObjectInspector getReflectionObjectInspectorNoCache(Type t) {
    if (t instanceof GenericArrayType) {
        GenericArrayType at = (GenericArrayType) t;
        return getStandardListObjectInspector(getReflectionObjectInspector(at.getGenericComponentType()));
    }
    Map<TypeVariable, Type> genericTypes = null;
    if (t instanceof ParameterizedType) {
        ParameterizedType pt = (ParameterizedType) t;
        Type rawType = pt.getRawType();
        // Collection?
        if (Collection.class.isAssignableFrom((Class<?>) rawType)) {
            return getStandardListObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0]));
        }
        // Map?
        if (Map.class.isAssignableFrom((Class<?>) rawType)) {
            return getStandardMapObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0]), getReflectionObjectInspector(pt.getActualTypeArguments()[1]));
        }
        // Otherwise convert t to RawType so we will fall into the following if block.
        t = rawType;
        ImmutableMap.Builder<TypeVariable, Type> builder = ImmutableMap.builder();
        for (int i = 0; i < pt.getActualTypeArguments().length; i++) {
            builder.put(((Class<?>) t).getTypeParameters()[i], pt.getActualTypeArguments()[i]);
        }
        genericTypes = builder.build();
    }
    // Must be a class.
    if (!(t instanceof Class)) {
        throw new RuntimeException(ObjectInspectorFactory.class.getName() + " internal error:" + t);
    }
    Class<?> c = (Class<?>) t;
    // Java Primitive Type?
    if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory);
    }
    // Java Primitive Class?
    if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory);
    }
    // Primitive Writable class?
    if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
        return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory);
    }
    // Enum class?
    if (Enum.class.isAssignableFrom(c)) {
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
    }
    // Array
    if (c.isArray()) {
        return getStandardListObjectInspector(getReflectionObjectInspector(c.getComponentType()));
    }
    // Must be struct because List and Map need to be ParameterizedType
    Preconditions.checkState(!List.class.isAssignableFrom(c));
    Preconditions.checkState(!Map.class.isAssignableFrom(c));
    Preconditions.checkState(!c.isInterface(), "Cannot inspect an interface.");
    ReflectionStructObjectInspector oi = new ReflectionStructObjectInspector();
    // put it into the cache BEFORE it is initialized to make sure we can catch
    // recursive types.
    objectInspectorCache.put(t, oi);
    Field[] fields = ObjectInspectorUtils.getDeclaredNonStaticFields(c);
    List<ObjectInspector> structFieldObjectInspectors = new ArrayList<>(fields.length);
    for (Field field : fields) {
        // "this" pointer present in nested classes and that references the parent.
        if (Modifier.isTransient(field.getModifiers()) || field.isSynthetic()) {
            continue;
        }
        if (!oi.shouldIgnoreField(field.getName())) {
            Type newType = field.getGenericType();
            if (newType instanceof TypeVariable) {
                Preconditions.checkNotNull(genericTypes, "Type was not recognized as a parameterized type.");
                Preconditions.checkNotNull(genericTypes.get(newType), "Generic type " + newType + " not a parameter of class " + c);
                newType = genericTypes.get(newType);
            }
            structFieldObjectInspectors.add(getReflectionObjectInspector(newType));
        }
    }
    oi.init(c, structFieldObjectInspectors);
    return oi;
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) GenericArrayType(java.lang.reflect.GenericArrayType) ImmutableMap(com.google.common.collect.ImmutableMap) ParameterizedType(java.lang.reflect.ParameterizedType) Field(java.lang.reflect.Field) GenericArrayType(java.lang.reflect.GenericArrayType) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) TypeVariable(java.lang.reflect.TypeVariable) PrimitiveObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory)

Example 72 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class DruidOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
    final int targetNumShardsPerGranularity = Integer.parseUnsignedInt(tableProperties.getProperty(Constants.DRUID_TARGET_SHARDS_PER_GRANULARITY, "0"));
    final int maxPartitionSize = targetNumShardsPerGranularity > 0 ? -1 : HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
    // If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource
    // name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the
    // job properties that are set by configureOutputJobProperties in the DruidStorageHandler
    final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE) == null ? jc.get(Constants.DRUID_DATA_SOURCE) : tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
    final String segmentDirectory = jc.get(Constants.DRUID_SEGMENT_INTERMEDIATE_DIRECTORY);
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.fromString(segmentGranularity), Granularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
    final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
        throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
    }
    ArrayList<String> columnNames = new ArrayList<String>();
    for (String name : columnNameProperty.split(",")) {
        columnNames.add(name);
    }
    if (!columnNames.contains(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
        throw new IllegalStateException("Timestamp column (' " + DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
    }
    ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    final boolean approximationAllowed = HiveConf.getBoolVar(jc, HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT);
    // Default, all columns that are not metrics or timestamp, are treated as dimensions
    final List<DimensionSchema> dimensions = new ArrayList<>();
    ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
    for (int i = 0; i < columnTypes.size(); i++) {
        final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
        AggregatorFactory af;
        switch(primitiveCategory) {
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
                af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case FLOAT:
            case DOUBLE:
                af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                break;
            case DECIMAL:
                if (approximationAllowed) {
                    af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
                } else {
                    throw new UnsupportedOperationException(String.format("Druid does not support decimal column type." + "Either cast column [%s] to double or Enable Approximate Result for Druid by setting property [%s] to true", columnNames.get(i), HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT.varname));
                }
                break;
            case TIMESTAMP:
                // Granularity column
                String tColumnName = columnNames.get(i);
                if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
                    throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            case TIMESTAMPLOCALTZ:
                // Druid timestamp column
                String tLocalTZColumnName = columnNames.get(i);
                if (!tLocalTZColumnName.equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
                    throw new IOException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
                }
                continue;
            default:
                // Dimension
                String dColumnName = columnNames.get(i);
                if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
                    throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
                }
                dimensions.add(new StringDimensionSchema(dColumnName));
                continue;
        }
        aggregatorFactoryBuilder.add(af);
    }
    List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Constants.DRUID_SHARD_KEY_COL_NAME), null)));
    Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
    final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
    final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
    final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
    String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
    if (Strings.isNullOrEmpty(basePersistDirectory)) {
        basePersistDirectory = System.getProperty("java.io.tmpdir");
    }
    Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
    IndexSpec indexSpec;
    if ("concise".equals(HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BITMAP_FACTORY_TYPE))) {
        indexSpec = new IndexSpec(new ConciseBitmapSerdeFactory(), null, null, null);
    } else {
        indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    }
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, indexSpec, true, 0, 0, true, null, 0L);
    LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
    return new DruidRecordWriter(dataSchema, realtimeTuningConfig, DruidStorageHandlerUtils.createSegmentPusherForDirectory(segmentDirectory, jc), maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
Also used : IndexSpec(io.druid.segment.IndexSpec) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(io.druid.segment.data.RoaringBitmapSerdeFactory) ConciseBitmapSerdeFactory(io.druid.segment.data.ConciseBitmapSerdeFactory) TimestampSpec(io.druid.data.input.impl.TimestampSpec) Path(org.apache.hadoop.fs.Path) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) IOException(java.io.IOException) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) GranularitySpec(io.druid.segment.indexing.granularity.GranularitySpec) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) CustomVersioningPolicy(io.druid.segment.realtime.plumber.CustomVersioningPolicy) File(java.io.File)

Example 73 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class HiveHBaseInputFormatUtil method getConstantVal.

static byte[] getConstantVal(Object writable, PrimitiveObjectInspector poi, boolean isKeyBinary) throws IOException {
    if (!isKeyBinary) {
        // Key is stored in text format. Get bytes representation of constant also of
        // text format.
        byte[] startRow;
        ByteStream.Output serializeStream = new ByteStream.Output();
        LazyUtils.writePrimitiveUTF8(serializeStream, writable, poi, false, (byte) 0, null);
        startRow = new byte[serializeStream.getLength()];
        System.arraycopy(serializeStream.getData(), 0, startRow, 0, serializeStream.getLength());
        return startRow;
    }
    PrimitiveCategory pc = poi.getPrimitiveCategory();
    switch(poi.getPrimitiveCategory()) {
        case INT:
            return Bytes.toBytes(((IntWritable) writable).get());
        case BOOLEAN:
            return Bytes.toBytes(((BooleanWritable) writable).get());
        case LONG:
            return Bytes.toBytes(((LongWritable) writable).get());
        case FLOAT:
            return Bytes.toBytes(((FloatWritable) writable).get());
        case DOUBLE:
            return Bytes.toBytes(((DoubleWritable) writable).get());
        case SHORT:
            return Bytes.toBytes(((ShortWritable) writable).get());
        case STRING:
            return Bytes.toBytes(((Text) writable).toString());
        case BYTE:
            return Bytes.toBytes(((ByteWritable) writable).get());
        default:
            throw new IOException("Type not supported " + pc);
    }
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) IOException(java.io.IOException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 74 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorAssignRow method initTargetEntry.

/*
   * Initialize one column's target related arrays.
   */
private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) {
    isConvert[logicalColumnIndex] = false;
    projectionColumnNums[logicalColumnIndex] = projectionColumnNum;
    targetTypeInfos[logicalColumnIndex] = typeInfo;
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
        final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        switch(primitiveCategory) {
            case CHAR:
                maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength();
                break;
            case VARCHAR:
                maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength();
                break;
            default:
                // No additional data type specific setting.
                break;
        }
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 75 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class VectorizationContext method getVectorTypeScalarValue.

private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws HiveException {
    TypeInfo typeInfo = constDesc.getTypeInfo();
    PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
    Object scalarValue = getScalarValue(constDesc);
    switch(primitiveCategory) {
        case DATE:
            return new Long(DateWritable.dateToDays((Date) scalarValue));
        case INTERVAL_YEAR_MONTH:
            return ((HiveIntervalYearMonth) scalarValue).getTotalMonths();
        default:
            return scalarValue;
    }
}
Also used : HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(java.sql.Date)

Aggregations

PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)84 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)45 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)26 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 ArrayList (java.util.ArrayList)15 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)15 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)14 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)11 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 Test (org.junit.Test)11 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)10 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)10 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)9 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)9 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)8 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)8 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)8