Search in sources :

Example 26 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class GenericUDFFormatNumber method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    Object arg0;
    Object arg1;
    if ((arg0 = arguments[0].get()) == null || (arg1 = arguments[1].get()) == null) {
        return null;
    }
    if (!dType.equals(PrimitiveCategory.STRING)) {
        int dValue = ((IntObjectInspector) argumentOIs[1]).get(arg1);
        if (dValue < 0) {
            throw new HiveException("Argument 2 of function FORMAT_NUMBER must be >= 0, but \"" + dValue + "\" was found");
        }
        if (dValue != lastDValue) {
            // construct a new DecimalFormat only if a new dValue
            pattern.delete(0, pattern.length());
            pattern.append("#,###,###,###,###,###,##0");
            // decimal place
            if (dValue > 0) {
                pattern.append(".");
                for (int i = 0; i < dValue; i++) {
                    pattern.append("0");
                }
            }
            DecimalFormat dFormat = new DecimalFormat(pattern.toString());
            lastDValue = dValue;
            numberFormat.applyPattern(dFormat.toPattern());
        }
    }
    double xDoubleValue = 0.0;
    float xFloatValue = 0.0f;
    HiveDecimal xDecimalValue = null;
    int xIntValue = 0;
    long xLongValue = 0L;
    PrimitiveObjectInspector xObjectInspector = (PrimitiveObjectInspector) argumentOIs[0];
    switch(xObjectInspector.getPrimitiveCategory()) {
        case VOID:
        case DOUBLE:
            xDoubleValue = ((DoubleObjectInspector) argumentOIs[0]).get(arg0);
            resultText.set(numberFormat.format(xDoubleValue));
            break;
        case FLOAT:
            xFloatValue = ((FloatObjectInspector) argumentOIs[0]).get(arg0);
            resultText.set(numberFormat.format(xFloatValue));
            break;
        case DECIMAL:
            xDecimalValue = ((HiveDecimalObjectInspector) argumentOIs[0]).getPrimitiveJavaObject(arg0);
            resultText.set(numberFormat.format(xDecimalValue.bigDecimalValue()));
            break;
        case BYTE:
        case SHORT:
        case INT:
            xIntValue = ((IntObjectInspector) argumentOIs[0]).get(arg0);
            resultText.set(numberFormat.format(xIntValue));
            break;
        case LONG:
            xLongValue = ((LongObjectInspector) argumentOIs[0]).get(arg0);
            resultText.set(numberFormat.format(xLongValue));
            break;
        default:
            throw new HiveException("Argument 1 of function FORMAT_NUMBER must be " + serdeConstants.TINYINT_TYPE_NAME + "\"" + " or \"" + serdeConstants.SMALLINT_TYPE_NAME + "\"" + " or \"" + serdeConstants.INT_TYPE_NAME + "\"" + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\"" + " or \"" + serdeConstants.DOUBLE_TYPE_NAME + "\"" + " or \"" + serdeConstants.FLOAT_TYPE_NAME + "\"" + " or \"" + serdeConstants.DECIMAL_TYPE_NAME + "\", but \"" + argumentOIs[0].getTypeName() + "\" was found.");
    }
    return resultText;
}
Also used : IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalFormat(java.text.DecimalFormat) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 27 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class GenericUDFBaseCompare method initForPrimitives.

private void initForPrimitives(ObjectInspector arg0, ObjectInspector arg1) throws UDFArgumentException {
    assert arg0.getCategory() == Category.PRIMITIVE;
    assert arg1.getCategory() == Category.PRIMITIVE;
    final TypeInfo type0 = TypeInfoUtils.getTypeInfoFromObjectInspector(arg0);
    final TypeInfo type1 = TypeInfoUtils.getTypeInfoFromObjectInspector(arg1);
    if (type0.equals(TypeInfoFactory.stringTypeInfo) && type1.equals(TypeInfoFactory.stringTypeInfo)) {
        soi0 = (StringObjectInspector) arg0;
        soi1 = (StringObjectInspector) arg1;
        if (soi0.preferWritable() || soi1.preferWritable()) {
            compareType = CompareType.COMPARE_TEXT;
        } else {
            compareType = CompareType.COMPARE_STRING;
        }
    } else if (type0.equals(TypeInfoFactory.intTypeInfo) && type1.equals(TypeInfoFactory.intTypeInfo)) {
        compareType = CompareType.COMPARE_INT;
        ioi0 = (IntObjectInspector) arg0;
        ioi1 = (IntObjectInspector) arg1;
    } else if (type0.equals(TypeInfoFactory.longTypeInfo) && type1.equals(TypeInfoFactory.longTypeInfo)) {
        compareType = CompareType.COMPARE_LONG;
        loi0 = (LongObjectInspector) arg0;
        loi1 = (LongObjectInspector) arg1;
    } else if (type0.equals(TypeInfoFactory.byteTypeInfo) && type1.equals(TypeInfoFactory.byteTypeInfo)) {
        compareType = CompareType.COMPARE_BYTE;
        byoi0 = (ByteObjectInspector) arg0;
        byoi1 = (ByteObjectInspector) arg1;
    } else if (type0.equals(TypeInfoFactory.booleanTypeInfo) && type1.equals(TypeInfoFactory.booleanTypeInfo)) {
        compareType = CompareType.COMPARE_BOOL;
        boi0 = (BooleanObjectInspector) arg0;
        boi1 = (BooleanObjectInspector) arg1;
    } else {
        if (type0 == type1 || TypeInfoUtils.doPrimitiveCategoriesMatch(type0, type1)) {
            compareType = CompareType.SAME_TYPE;
        } else {
            compareType = CompareType.NEED_CONVERT;
            TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(type0, type1);
            // For now, we always convert to double if we can't find a common type
            compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo((compareType == null) ? TypeInfoFactory.doubleTypeInfo : compareType);
            converter0 = ObjectInspectorConverters.getConverter(arg0, compareOI);
            converter1 = ObjectInspectorConverters.getConverter(arg1, compareOI);
            checkConversionAllowed(arg0, compareOI);
            checkConversionAllowed(arg1, compareOI);
        }
    }
}
Also used : ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 28 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class TestVectorCoalesceElt method doCoalesceOnRandomDataType.

private boolean doCoalesceOnRandomDataType(Random random, int iteration, boolean isCoalesce, boolean isEltIndexConst, int columnCount, int[] constantColumns, int[] nullConstantColumns, boolean allowNulls) throws Exception {
    String typeName;
    if (isCoalesce) {
        typeName = VectorRandomRowSource.getRandomTypeName(random, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */
        null);
        typeName = VectorRandomRowSource.getDecoratedTypeName(random, typeName, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */
        null, /* depth */
        0, /* maxDepth */
        2);
    } else {
        // ELT only choose between STRINGs.
        typeName = "string";
    }
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
    // ----------------------------------------------------------------------------------------------
    final TypeInfo intTypeInfo;
    ObjectInspector intObjectInspector;
    if (isCoalesce) {
        intTypeInfo = null;
        intObjectInspector = null;
    } else {
        intTypeInfo = TypeInfoFactory.intTypeInfo;
        intObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(intTypeInfo);
    }
    ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    // ----------------------------------------------------------------------------------------------
    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    List<String> columns = new ArrayList<String>();
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    int columnNum = 1;
    if (!isCoalesce) {
        List<Object> intValueList = new ArrayList<Object>();
        for (int i = -1; i < columnCount + 2; i++) {
            intValueList.add(new IntWritable(i));
        }
        final int intValueListCount = intValueList.size();
        ExprNodeDesc intColExpr;
        if (!isEltIndexConst) {
            generationSpecList.add(GenerationSpec.createValueList(intTypeInfo, intValueList));
            explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
            String columnName = "col" + columnNum++;
            columns.add(columnName);
            intColExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false);
        } else {
            final Object scalarObject;
            if (random.nextInt(10) != 0) {
                scalarObject = intValueList.get(random.nextInt(intValueListCount));
            } else {
                scalarObject = null;
            }
            intColExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
        }
        children.add(intColExpr);
    }
    for (int c = 0; c < columnCount; c++) {
        ExprNodeDesc colExpr;
        if (!contains(constantColumns, c)) {
            generationSpecList.add(GenerationSpec.createSameType(typeInfo));
            explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
            String columnName = "col" + columnNum++;
            columns.add(columnName);
            colExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
        } else {
            final Object scalarObject;
            if (!contains(nullConstantColumns, c)) {
                scalarObject = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo);
            } else {
                scalarObject = null;
            }
            colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
        }
        children.add(colExpr);
    }
    VectorRandomRowSource rowSource = new VectorRandomRowSource();
    rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    allowNulls, /* isUnicodeOk */
    true, explicitDataTypePhysicalVariationList);
    String[] columnNames = columns.toArray(new String[0]);
    Object[][] randomRows = rowSource.randomRows(100000);
    VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
    final GenericUDF udf = (isCoalesce ? new GenericUDFCoalesce() : new GenericUDFElt());
    final int start = isCoalesce ? 0 : 1;
    final int end = start + columnCount;
    ObjectInspector[] argumentOIs = new ObjectInspector[end];
    if (!isCoalesce) {
        argumentOIs[0] = intObjectInspector;
    }
    for (int i = start; i < end; i++) {
        argumentOIs[i] = objectInspector;
    }
    final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
    TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
    final int rowCount = randomRows.length;
    Object[][] resultObjectsArray = new Object[CoalesceEltTestMode.count][];
    for (int i = 0; i < CoalesceEltTestMode.count; i++) {
        Object[] resultObjects = new Object[rowCount];
        resultObjectsArray[i] = resultObjects;
        CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[i];
        switch(coalesceEltTestMode) {
            case ROW_MODE:
                if (!doRowCastTest(typeInfo, columns, children, udf, exprDesc, randomRows, rowSource.rowStructObjectInspector(), exprDesc.getWritableObjectInspector(), resultObjects)) {
                    return false;
                }
                break;
            case ADAPTOR:
            case VECTOR_EXPRESSION:
                if (!doVectorCastTest(typeInfo, iteration, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, udf, exprDesc, coalesceEltTestMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects)) {
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected IF statement test mode " + coalesceEltTestMode);
        }
    }
    for (int i = 0; i < rowCount; i++) {
        // Row-mode is the expected value.
        Object expectedResult = resultObjectsArray[0][i];
        for (int v = 1; v < CoalesceEltTestMode.count; v++) {
            Object vectorResult = resultObjectsArray[v][i];
            CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[v];
            if (expectedResult == null || vectorResult == null) {
                if (expectedResult != null || vectorResult != null) {
                    Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + " iteration " + iteration + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result '" + expectedResult.toString()) + "'" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
                }
            } else {
                if (!expectedResult.equals(vectorResult)) {
                    Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + " iteration " + iteration + " result '" + vectorResult.toString() + "'" + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result '" + expectedResult.toString() + "'" + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
                }
            }
        }
    }
    return true;
}
Also used : ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IntWritable(org.apache.hadoop.io.IntWritable) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenerationSpec(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFCoalesce(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce) GenericUDFElt(org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 29 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class LazyBinarySerDe method serialize.

/**
 * A recursive function that serialize an object to a byte buffer based on its
 * object inspector.
 *
 * @param byteStream
 *          the byte stream storing the serialization data
 * @param obj
 *          the object to serialize
 * @param objInspector
 *          the object inspector
 * @param skipLengthPrefix a boolean indicating whether length prefix is
 *          needed for list/map/struct
 * @param warnedOnceNullMapKey a boolean indicating whether a warning
 *          has been issued once already when encountering null map keys
 */
public static void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objInspector, boolean skipLengthPrefix, BooleanRef warnedOnceNullMapKey) throws SerDeException {
    // do nothing for null object
    if (null == obj) {
        return;
    }
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return;
                        }
                    case BOOLEAN:
                        {
                            boolean v = ((BooleanObjectInspector) poi).get(obj);
                            byteStream.write((byte) (v ? 1 : 0));
                            return;
                        }
                    case BYTE:
                        {
                            ByteObjectInspector boi = (ByteObjectInspector) poi;
                            byte v = boi.get(obj);
                            byteStream.write(v);
                            return;
                        }
                    case SHORT:
                        {
                            ShortObjectInspector spoi = (ShortObjectInspector) poi;
                            short v = spoi.get(obj);
                            byteStream.write((byte) (v >> 8));
                            byteStream.write((byte) (v));
                            return;
                        }
                    case INT:
                        {
                            IntObjectInspector ioi = (IntObjectInspector) poi;
                            int v = ioi.get(obj);
                            LazyBinaryUtils.writeVInt(byteStream, v);
                            return;
                        }
                    case LONG:
                        {
                            LongObjectInspector loi = (LongObjectInspector) poi;
                            long v = loi.get(obj);
                            LazyBinaryUtils.writeVLong(byteStream, v);
                            return;
                        }
                    case FLOAT:
                        {
                            FloatObjectInspector foi = (FloatObjectInspector) poi;
                            int v = Float.floatToIntBits(foi.get(obj));
                            byteStream.write((byte) (v >> 24));
                            byteStream.write((byte) (v >> 16));
                            byteStream.write((byte) (v >> 8));
                            byteStream.write((byte) (v));
                            return;
                        }
                    case DOUBLE:
                        {
                            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                            LazyBinaryUtils.writeDouble(byteStream, doi.get(obj));
                            return;
                        }
                    case STRING:
                        {
                            StringObjectInspector soi = (StringObjectInspector) poi;
                            Text t = soi.getPrimitiveWritableObject(obj);
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case CHAR:
                        {
                            HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
                            Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
                            Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
                            serializeText(byteStream, t, skipLengthPrefix);
                            return;
                        }
                    case BINARY:
                        {
                            BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                            BytesWritable bw = baoi.getPrimitiveWritableObject(obj);
                            int length = bw.getLength();
                            if (!skipLengthPrefix) {
                                LazyBinaryUtils.writeVInt(byteStream, length);
                            } else {
                                if (length == 0) {
                                    throw new RuntimeException("LazyBinaryColumnarSerde cannot serialize a non-null zero " + "length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
                                }
                            }
                            byteStream.write(bw.getBytes(), 0, length);
                            return;
                        }
                    case DATE:
                        {
                            DateWritableV2 d = ((DateObjectInspector) poi).getPrimitiveWritableObject(obj);
                            writeDateToByteStream(byteStream, d);
                            return;
                        }
                    case TIMESTAMP:
                        {
                            TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                            TimestampWritableV2 t = toi.getPrimitiveWritableObject(obj);
                            t.writeToByteStream(byteStream);
                            return;
                        }
                    case TIMESTAMPLOCALTZ:
                        {
                            TimestampLocalTZWritable t = ((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(obj);
                            t.writeToByteStream(byteStream);
                            return;
                        }
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(obj);
                            intervalYearMonth.writeToByteStream(byteStream);
                            return;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(obj);
                            intervalDayTime.writeToByteStream(byteStream);
                            return;
                        }
                    case DECIMAL:
                        {
                            HiveDecimalObjectInspector bdoi = (HiveDecimalObjectInspector) poi;
                            HiveDecimalWritable t = bdoi.getPrimitiveWritableObject(obj);
                            if (t == null) {
                                return;
                            }
                            writeToByteStream(byteStream, t);
                            return;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListObjectInspector loi = (ListObjectInspector) objInspector;
                ObjectInspector eoi = loi.getListElementObjectInspector();
                int byteSizeStart = 0;
                int listStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the list
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    listStart = byteStream.getLength();
                }
                // 2/ write the size of the list as a VInt
                int size = loi.getListLength(obj);
                LazyBinaryUtils.writeVInt(byteStream, size);
                // 3/ write the null bytes
                byte nullByte = 0;
                for (int eid = 0; eid < size; eid++) {
                    // set the bit to 1 if an element is not null
                    if (null != loi.getListElement(obj, eid)) {
                        nullByte |= 1 << (eid % 8);
                    }
                    // if this is the last element
                    if (7 == eid % 8 || eid == size - 1) {
                        byteStream.write(nullByte);
                        nullByte = 0;
                    }
                }
                // 4/ write element by element from the list
                for (int eid = 0; eid < size; eid++) {
                    serialize(byteStream, loi.getListElement(obj, eid), eoi, false, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 5/ update the list byte size
                    int listEnd = byteStream.getLength();
                    int listSize = listEnd - listStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, listSize);
                }
                return;
            }
        case MAP:
            {
                MapObjectInspector moi = (MapObjectInspector) objInspector;
                ObjectInspector koi = moi.getMapKeyObjectInspector();
                ObjectInspector voi = moi.getMapValueObjectInspector();
                Map<?, ?> map = moi.getMap(obj);
                int byteSizeStart = 0;
                int mapStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the map
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    mapStart = byteStream.getLength();
                }
                // 2/ write the size of the map which is a VInt
                int size = map.size();
                LazyBinaryUtils.writeVInt(byteStream, size);
                // 3/ write the null bytes
                int b = 0;
                byte nullByte = 0;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    // set the bit to 1 if a key is not null
                    if (null != entry.getKey()) {
                        nullByte |= 1 << (b % 8);
                    } else if (warnedOnceNullMapKey != null) {
                        if (!warnedOnceNullMapKey.value) {
                            LOG.warn("Null map key encountered! Ignoring similar problems.");
                        }
                        warnedOnceNullMapKey.value = true;
                    }
                    b++;
                    // set the bit to 1 if a value is not null
                    if (null != entry.getValue()) {
                        nullByte |= 1 << (b % 8);
                    }
                    b++;
                    // or if this is the last key-value pair
                    if (0 == b % 8 || b == size * 2) {
                        byteStream.write(nullByte);
                        nullByte = 0;
                    }
                }
                // 4/ write key-value pairs one by one
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    serialize(byteStream, entry.getKey(), koi, false, warnedOnceNullMapKey);
                    serialize(byteStream, entry.getValue(), voi, false, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 5/ update the byte size of the map
                    int mapEnd = byteStream.getLength();
                    int mapSize = mapEnd - mapStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, mapSize);
                }
                return;
            }
        case STRUCT:
        case UNION:
            {
                int byteSizeStart = 0;
                int typeStart = 0;
                if (!skipLengthPrefix) {
                    // 1/ reserve spaces for the byte size of the struct
                    // which is a integer and takes four bytes
                    byteSizeStart = byteStream.getLength();
                    byteStream.reserve(4);
                    typeStart = byteStream.getLength();
                }
                if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory())) {
                    // 2/ serialize the struct
                    serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
                } else {
                    // 2/ serialize the union
                    serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
                }
                if (!skipLengthPrefix) {
                    // 3/ update the byte size of the struct
                    int typeEnd = byteStream.getLength();
                    int typeSize = typeEnd - typeStart;
                    writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
                }
                return;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
            }
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 30 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class DataWritableWriter method createWriter.

/**
 * Creates a writer for the specific object inspector. The returned writer will be used
 * to call Parquet API for the specific data type.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 * @return A ParquetWriter object used to call the Parquet API fo the specific data type.
 */
private DataWriter createWriter(ObjectInspector inspector, Type type) {
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspector;
        switch(primitiveInspector.getPrimitiveCategory()) {
            case BOOLEAN:
                return new BooleanDataWriter((BooleanObjectInspector) inspector);
            case BYTE:
                return new ByteDataWriter((ByteObjectInspector) inspector);
            case SHORT:
                return new ShortDataWriter((ShortObjectInspector) inspector);
            case INT:
                return new IntDataWriter((IntObjectInspector) inspector);
            case LONG:
                return new LongDataWriter((LongObjectInspector) inspector);
            case FLOAT:
                return new FloatDataWriter((FloatObjectInspector) inspector);
            case DOUBLE:
                return new DoubleDataWriter((DoubleObjectInspector) inspector);
            case STRING:
                return new StringDataWriter((StringObjectInspector) inspector);
            case CHAR:
                return new CharDataWriter((HiveCharObjectInspector) inspector);
            case VARCHAR:
                return new VarcharDataWriter((HiveVarcharObjectInspector) inspector);
            case BINARY:
                return new BinaryDataWriter((BinaryObjectInspector) inspector);
            case TIMESTAMP:
                return new TimestampDataWriter((TimestampObjectInspector) inspector);
            case DECIMAL:
                return new DecimalDataWriter((HiveDecimalObjectInspector) inspector);
            case DATE:
                return new DateDataWriter((DateObjectInspector) inspector);
            default:
                throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
        }
    } else {
        GroupType groupType = type.asGroupType();
        LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
        if (logicalType != null && logicalType instanceof ListLogicalTypeAnnotation) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            return new ListDataWriter((ListObjectInspector) inspector, groupType);
        } else if (logicalType != null && logicalType instanceof MapLogicalTypeAnnotation) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            return new MapDataWriter((MapObjectInspector) inspector, groupType);
        } else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            return new StructDataWriter((StructObjectInspector) inspector, groupType);
        }
    }
}
Also used : MapLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) ListLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) MapLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation) GroupType(org.apache.parquet.schema.GroupType) ListLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)26 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)17 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)17 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)17 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)17 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)17 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)16 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)16 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)15 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)15 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)14 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)13 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)13 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)12 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)12 Text (org.apache.hadoop.io.Text)10 Map (java.util.Map)9 ArrayList (java.util.ArrayList)8