Search in sources :

Example 61 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class SerdeRandomRowSource method getWritableObject.

public Object getWritableObject(int column, Object object) {
    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
    PrimitiveCategory primitiveCategory = primitiveCategories[column];
    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
    switch(primitiveCategory) {
        case BOOLEAN:
            return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
        case BYTE:
            return ((WritableByteObjectInspector) objectInspector).create((byte) object);
        case SHORT:
            return ((WritableShortObjectInspector) objectInspector).create((short) object);
        case INT:
            return ((WritableIntObjectInspector) objectInspector).create((int) object);
        case LONG:
            return ((WritableLongObjectInspector) objectInspector).create((long) object);
        case DATE:
            return ((WritableDateObjectInspector) objectInspector).create((Date) object);
        case FLOAT:
            return ((WritableFloatObjectInspector) objectInspector).create((float) object);
        case DOUBLE:
            return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
        case STRING:
            return ((WritableStringObjectInspector) objectInspector).create((String) object);
        case CHAR:
            {
                WritableHiveCharObjectInspector writableCharObjectInspector = new WritableHiveCharObjectInspector((CharTypeInfo) primitiveTypeInfo);
                return writableCharObjectInspector.create((HiveChar) object);
            }
        case VARCHAR:
            {
                WritableHiveVarcharObjectInspector writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector((VarcharTypeInfo) primitiveTypeInfo);
                return writableVarcharObjectInspector.create((HiveVarchar) object);
            }
        case BINARY:
            return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create((byte[]) object);
        case TIMESTAMP:
            return ((WritableTimestampObjectInspector) objectInspector).create((Timestamp) object);
        case INTERVAL_YEAR_MONTH:
            return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create((HiveIntervalYearMonth) object);
        case INTERVAL_DAY_TIME:
            return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create((HiveIntervalDayTime) object);
        case DECIMAL:
            {
                WritableHiveDecimalObjectInspector writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
                return writableDecimalObjectInspector.create((HiveDecimal) object);
            }
        default:
            throw new Error("Unknown primitive category " + primitiveCategory);
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector)

Example 62 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project presto by prestodb.

the class HiveBucketing method getJavaObjectInspector.

private static ObjectInspector getJavaObjectInspector(ObjectInspector objectInspector) {
    checkArgument(objectInspector.getCategory() == Category.PRIMITIVE, "Unsupported object inspector category %s", objectInspector.getCategory());
    PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objectInspector);
    switch(poi.getPrimitiveCategory()) {
        case BOOLEAN:
            return javaBooleanObjectInspector;
        case BYTE:
            return javaByteObjectInspector;
        case SHORT:
            return javaShortObjectInspector;
        case INT:
            return javaIntObjectInspector;
        case LONG:
            return javaLongObjectInspector;
        case STRING:
            return javaStringObjectInspector;
    }
    throw new RuntimeException("Unsupported type: " + poi.getPrimitiveCategory());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 63 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class VectorizedListColumnReader method readBatch.

@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
    ListColumnVector lcv = (ListColumnVector) column;
    // before readBatch, initial the size of offsets & lengths as the default value,
    // the actual size will be assigned in setChildrenInfo() after reading complete.
    lcv.offsets = new long[VectorizedRowBatch.DEFAULT_SIZE];
    lcv.lengths = new long[VectorizedRowBatch.DEFAULT_SIZE];
    // Because the length of ListColumnVector.child can't be known now,
    // the valueList will save all data for ListColumnVector temporary.
    List<Object> valueList = new ArrayList<>();
    PrimitiveObjectInspector.PrimitiveCategory category = ((PrimitiveTypeInfo) ((ListTypeInfo) columnType).getListElementTypeInfo()).getPrimitiveCategory();
    // read the first row in parquet data page, this will be only happened once for this instance
    if (isFirstRow) {
        if (!fetchNextValue(category)) {
            return;
        }
        isFirstRow = false;
    }
    int index = 0;
    while (!eof && index < total) {
        // add element to ListColumnVector one by one
        addElement(lcv, valueList, category, index);
        index++;
    }
    // Decode the value if necessary
    if (isCurrentPageDictionaryEncoded) {
        valueList = decodeDictionaryIds(category, valueList);
    }
    // Convert valueList to array for the ListColumnVector.child
    convertValueListToListColumnVector(category, lcv, valueList, index);
}
Also used : ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) ArrayList(java.util.ArrayList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 64 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class VectorizedParquetRecordReader method buildVectorizedParquetReader.

// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
    List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            if (fileSchema.getColumns().contains(descriptors.get(0))) {
                return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
            } else {
                // Support for schema evolution
                return new VectorizedDummyColumnReader();
            }
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
            List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
            List<Type> types = type.asGroupType().getFields();
            for (int i = 0; i < fieldTypes.size(); i++) {
                VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
                if (r != null) {
                    fieldReaders.add(r);
                } else {
                    throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
                }
            }
            return new VectorizedStructColumnReader(fieldReaders);
        case LIST:
            checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
        case MAP:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            // to handle the different Map definition in Parquet, eg:
            // definition has 1 group:
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // definition has 2 groups:
            // optional group m1 (MAP) {
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // }
            int nestGroup = 0;
            GroupType groupType = type.asGroupType();
            // otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
            while (groupType.getFieldCount() < 2) {
                if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
                    throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
                }
                groupType = groupType.getFields().get(0).asGroupType();
                nestGroup++;
            }
            List<Type> kvTypes = groupType.getFields();
            VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
            VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
            return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
        case UNION:
        default:
            throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) ParquetRuntimeException(org.apache.parquet.ParquetRuntimeException) GroupType(org.apache.parquet.schema.GroupType)

Example 65 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class PushdownTuple method getConstantAsBytes.

/**
 * @return byte [] value from writable.
 * @throws SerDeException
 */
public byte[] getConstantAsBytes(Writable writable) throws SerDeException {
    if (pCompare instanceof StringCompare) {
        return writable.toString().getBytes();
    } else if (pCompare instanceof DoubleCompare) {
        byte[] bts = new byte[8];
        double val = ((DoubleWritable) writable).get();
        ByteBuffer.wrap(bts).putDouble(val);
        return bts;
    } else if (pCompare instanceof IntCompare) {
        byte[] bts = new byte[4];
        int val = ((IntWritable) writable).get();
        ByteBuffer.wrap(bts).putInt(val);
        return bts;
    } else if (pCompare instanceof LongCompare) {
        byte[] bts = new byte[8];
        long val = ((LongWritable) writable).get();
        ByteBuffer.wrap(bts).putLong(val);
        return bts;
    } else {
        throw new SerDeException("Unsupported primitive category: " + pCompare.getClass().getName());
    }
}
Also used : LongCompare(org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare) StringCompare(org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare) IntCompare(org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare) DoubleCompare(org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare) IntWritable(org.apache.hadoop.io.IntWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)44 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)33 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)31 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)27 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)26 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)20 BytesWritable (org.apache.hadoop.io.BytesWritable)19 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 Text (org.apache.hadoop.io.Text)18 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)17 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)16 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)16 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)16 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)16 IntWritable (org.apache.hadoop.io.IntWritable)16