Search in sources :

Example 16 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project presto by prestodb.

the class TestDataWritableWriter method writePrimitive.

/**
 * It writes the primitive value to the Parquet RecordConsumer.
 *
 * @param value The object that contains the primitive value.
 * @param inspector The object inspector used to get the correct value type.
 */
private void writePrimitive(final Object value, final PrimitiveObjectInspector inspector) {
    if (value == null) {
        return;
    }
    switch(inspector.getPrimitiveCategory()) {
        case VOID:
            return;
        case DOUBLE:
            recordConsumer.addDouble(((DoubleObjectInspector) inspector).get(value));
            break;
        case BOOLEAN:
            recordConsumer.addBoolean(((BooleanObjectInspector) inspector).get(value));
            break;
        case FLOAT:
            recordConsumer.addFloat(((FloatObjectInspector) inspector).get(value));
            break;
        case BYTE:
            recordConsumer.addInteger(((ByteObjectInspector) inspector).get(value));
            break;
        case INT:
            recordConsumer.addInteger(((IntObjectInspector) inspector).get(value));
            break;
        case LONG:
            recordConsumer.addLong(((LongObjectInspector) inspector).get(value));
            break;
        case SHORT:
            recordConsumer.addInteger(((ShortObjectInspector) inspector).get(value));
            break;
        case STRING:
            String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromString(v));
            break;
        case CHAR:
            String vChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(value).getStrippedValue();
            recordConsumer.addBinary(Binary.fromString(vChar));
            break;
        case VARCHAR:
            String vVarchar = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(value).getValue();
            recordConsumer.addBinary(Binary.fromString(vVarchar));
            break;
        case BINARY:
            byte[] vBinary = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromByteArray(vBinary));
            break;
        case TIMESTAMP:
            Timestamp ts = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary());
            break;
        case DECIMAL:
            HiveDecimal vDecimal = ((HiveDecimal) inspector.getPrimitiveJavaObject(value));
            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
            recordConsumer.addBinary(decimalToBinary(vDecimal, decTypeInfo));
            break;
        case DATE:
            Date vDate = ((DateObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addInteger(DateWritable.dateToDays(vDate));
            break;
        default:
            throw new IllegalArgumentException("Unsupported primitive data type: " + inspector.getPrimitiveCategory());
    }
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) Timestamp(java.sql.Timestamp) Date(java.sql.Date)

Example 17 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project presto by prestodb.

the class HiveType method getTypeSignature.

private static TypeSignature getTypeSignature(TypeInfo typeInfo) {
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            Type primitiveType = getPrimitiveType((PrimitiveTypeInfo) typeInfo);
            if (primitiveType == null) {
                break;
            }
            return primitiveType.getTypeSignature();
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeSignature keyType = getTypeSignature(mapTypeInfo.getMapKeyTypeInfo());
            TypeSignature valueType = getTypeSignature(mapTypeInfo.getMapValueTypeInfo());
            return new TypeSignature(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType), TypeSignatureParameter.of(valueType)));
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            TypeSignature elementType = getTypeSignature(listTypeInfo.getListElementTypeInfo());
            return new TypeSignature(StandardTypes.ARRAY, ImmutableList.of(TypeSignatureParameter.of(elementType)));
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<TypeInfo> structFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
            if (structFieldTypeInfos.size() != structFieldNames.size()) {
                throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, format("Invalid Hive struct type: %s", typeInfo));
            }
            ImmutableList.Builder<TypeSignatureParameter> typeSignatureBuilder = ImmutableList.builder();
            for (int i = 0; i < structFieldTypeInfos.size(); i++) {
                TypeSignature typeSignature = getTypeSignature(structFieldTypeInfos.get(i));
                // Lower case the struct field names.
                // Otherwise, Presto will refuse to write to columns whose struct type has field names containing upper case characters.
                // Users can't work around this by casting in their queries because Presto parser always lower case types.
                // TODO: This is a hack. Presto engine should be able to handle identifiers in a case insensitive way where necessary.
                String rowFieldName = structFieldNames.get(i).toLowerCase(Locale.US);
                typeSignatureBuilder.add(TypeSignatureParameter.of(new NamedTypeSignature(Optional.of(new RowFieldName(rowFieldName, false)), typeSignature)));
            }
            return new TypeSignature(StandardTypes.ROW, typeSignatureBuilder.build());
    }
    throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) RowFieldName(com.facebook.presto.common.type.RowFieldName) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrestoException(com.facebook.presto.spi.PrestoException) TypeInfoUtils.getTypeInfosFromTypeString(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString) TypeInfoUtils.getTypeInfoFromTypeString(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString) TypeInfoFactory.doubleTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.doubleTypeInfo) TypeInfoFactory.dateTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.dateTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.longTypeInfo) TypeInfoFactory.shortTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.shortTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TypeInfoFactory.timestampTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.timestampTypeInfo) TypeInfoFactory.floatTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.floatTypeInfo) TypeInfoFactory.intTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.intTypeInfo) TypeInfoFactory.binaryTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo) TypeInfoFactory.byteTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.byteTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfoFactory.booleanTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TypeInfoFactory.stringTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Type(com.facebook.presto.common.type.Type) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) TypeSignature(com.facebook.presto.common.type.TypeSignature) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)

Example 18 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project presto by prestodb.

the class ThriftMetastoreUtil method createMetastoreColumnStatistics.

public static ColumnStatisticsObj createMetastoreColumnStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) {
    TypeInfo typeInfo = columnType.getTypeInfo();
    checkArgument(typeInfo.getCategory() == PRIMITIVE, "unsupported type: %s", columnType);
    switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
        case BOOLEAN:
            return createBooleanStatistics(columnName, columnType, statistics);
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            return createLongStatistics(columnName, columnType, statistics);
        case FLOAT:
        case DOUBLE:
            return createDoubleStatistics(columnName, columnType, statistics);
        case STRING:
        case VARCHAR:
        case CHAR:
            return createStringStatistics(columnName, columnType, statistics, rowCount);
        case DATE:
            return createDateStatistics(columnName, columnType, statistics);
        case TIMESTAMP:
            return createLongStatistics(columnName, columnType, statistics);
        case BINARY:
            return createBinaryStatistics(columnName, columnType, statistics, rowCount);
        case DECIMAL:
            return createDecimalStatistics(columnName, columnType, statistics);
        default:
            throw new IllegalArgumentException(format("unsupported type: %s", columnType));
    }
}
Also used : PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 19 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project flink by apache.

the class HiveParserRexNodeConverter method convertGenericFunc.

private RexNode convertGenericFunc(ExprNodeGenericFuncDesc func) throws SemanticException {
    ExprNodeDesc tmpExprNode;
    RexNode tmpRN;
    List<RexNode> childRexNodeLst = new ArrayList<>();
    List<RelDataType> argTypes = new ArrayList<>();
    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
    TypeInfo tgtDT = null;
    GenericUDF tgtUdf = func.getGenericUDF();
    if (tgtUdf instanceof GenericUDFIn) {
        return convertIN(func);
    }
    boolean isNumeric = isNumericBinary(func);
    boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
    boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
    boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0;
    if (isNumeric) {
        tgtDT = func.getTypeInfo();
        assert func.getChildren().size() == 2;
    // TODO: checking 2 children is useless, compare already does that.
    } else if (isCompare && (func.getChildren().size() == 2)) {
        tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    } else if (isWhenCase) {
        // functions as they are not allowed
        if (checkForStatefulFunctions(func.getChildren())) {
            throw new SemanticException("Stateful expressions cannot be used inside of CASE");
        }
    } else if (isTransformableTimeStamp) {
        func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
    }
    for (ExprNodeDesc childExpr : func.getChildren()) {
        tmpExprNode = childExpr;
        if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
            if (isCompare) {
                // For compare, we will convert requisite children
                tmpExprNode = HiveASTParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
            } else if (isNumeric) {
                // For numeric, we'll do minimum necessary cast - if we cast to the type
                // of expression, bad things will happen.
                PrimitiveTypeInfo minArgType = HiveParserExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
                tmpExprNode = HiveASTParseUtils.createConversionCast(childExpr, minArgType);
            } else {
                throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
            }
        }
        argTypes.add(HiveParserTypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
        tmpRN = convert(tmpExprNode);
        childRexNodeLst.add(tmpRN);
    }
    // process the function
    RelDataType retType = HiveParserTypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
    SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypes, retType);
    if (calciteOp.getKind() == SqlKind.CASE) {
        // If it is a case operator, we need to rewrite it
        childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst);
    }
    RexNode expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
    // check whether we need a calcite cast
    RexNode cast = handleExplicitCast(func, childRexNodeLst, ((RexCall) expr).getOperator());
    if (cast != null) {
        expr = cast;
        retType = cast.getType();
    }
    // an exception
    if (flattenExpr && expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
        RexCall call = (RexCall) expr;
        expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
    }
    return expr;
}
Also used : GenericUDFCase(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase) SqlCastFunction(org.apache.calcite.sql.fun.SqlCastFunction) SqlOperator(org.apache.calcite.sql.SqlOperator) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ArrayList(java.util.ArrayList) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) RelDataType(org.apache.calcite.rel.type.RelDataType) GenericUDFUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RexCall(org.apache.calcite.rex.RexCall) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 20 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project flink by apache.

the class HiveParserUtils method toRelDataType.

// converts a hive TypeInfo to RelDataType
public static RelDataType toRelDataType(TypeInfo typeInfo, RelDataTypeFactory relTypeFactory) throws SemanticException {
    RelDataType res;
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            // hive sets NULLABLE for all primitive types, revert that
            res = HiveParserTypeConverter.convert(typeInfo, relTypeFactory);
            return relTypeFactory.createTypeWithNullability(res, false);
        case LIST:
            RelDataType elementType = toRelDataType(((ListTypeInfo) typeInfo).getListElementTypeInfo(), relTypeFactory);
            return relTypeFactory.createArrayType(elementType, -1);
        case MAP:
            RelDataType keyType = toRelDataType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo(), relTypeFactory);
            RelDataType valType = toRelDataType(((MapTypeInfo) typeInfo).getMapValueTypeInfo(), relTypeFactory);
            return relTypeFactory.createMapType(keyType, valType);
        case STRUCT:
            List<TypeInfo> types = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
            List<RelDataType> convertedTypes = new ArrayList<>(types.size());
            for (TypeInfo type : types) {
                convertedTypes.add(toRelDataType(type, relTypeFactory));
            }
            return relTypeFactory.createStructType(convertedTypes, ((StructTypeInfo) typeInfo).getAllStructFieldNames());
        case UNION:
        default:
            throw new SemanticException(String.format("%s type is not supported yet", typeInfo.getCategory().name()));
    }
}
Also used : ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)83 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)74 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)64 ArrayList (java.util.ArrayList)54 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)52 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)47 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)46 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)45 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)44 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)43 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)38 BytesWritable (org.apache.hadoop.io.BytesWritable)36 Text (org.apache.hadoop.io.Text)35 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)34 List (java.util.List)30 Map (java.util.Map)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)30 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)27