Search in sources :

Example 1 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project presto-hive-apache by prestodb.

the class JsonSerDe method extractCurrentField.

/**
 * Utility method to extract current expected field from given JsonParser
 *
 * isTokenCurrent is a boolean variable also passed in, which determines
 * if the JsonParser is already at the token we expect to read next, or
 * needs advancing to the next before we read.
 */
private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException {
    Object val = null;
    JsonToken valueToken;
    if (isTokenCurrent) {
        valueToken = p.getCurrentToken();
    } else {
        valueToken = p.nextToken();
    }
    switch(hcatFieldSchema.getType()) {
        case INT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
            break;
        case TINYINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
            break;
        case SMALLINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
            break;
        case BIGINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
            break;
        case BOOLEAN:
            String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (bval != null) {
                val = Boolean.valueOf(bval);
            } else {
                val = null;
            }
            break;
        case FLOAT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
            break;
        case DOUBLE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
            break;
        case STRING:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            break;
        case BINARY:
            throw new IOException("JsonSerDe does not support BINARY type");
        case DATE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
            break;
        case TIMESTAMP:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText());
            break;
        case DECIMAL:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
            break;
        case VARCHAR:
            int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
            break;
        case CHAR:
            int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
            break;
        case ARRAY:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_ARRAY) {
                throw new IOException("Start of Array expected");
            }
            List<Object> arr = new ArrayList<Object>();
            while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
                arr.add(extractCurrentField(p, hcatFieldSchema.getArrayElementSchema().get(0), true));
            }
            val = arr;
            break;
        case MAP:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            Map<Object, Object> map = new LinkedHashMap<Object, Object>();
            HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
                Object v = extractCurrentField(p, valueSchema, false);
                map.put(k, v);
            }
            val = map;
            break;
        case STRUCT:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
            int sz = subSchema.getFieldNames().size();
            List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                populateRecord(struct, valueToken, p, subSchema);
            }
            val = struct;
            break;
        default:
            LOG.error("Unknown type found: " + hcatFieldSchema.getType());
            return null;
    }
    return val;
}
Also used : BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) LinkedHashMap(java.util.LinkedHashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) JsonToken(org.codehaus.jackson.JsonToken)

Example 2 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project flink by apache.

the class HiveParserTypeConverter method convert.

public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
    RelDataType convertedType = null;
    HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
    switch(type.getPrimitiveCategory()) {
        case VOID:
            convertedType = dtFactory.createSqlType(SqlTypeName.NULL);
            break;
        case BOOLEAN:
            convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
            break;
        case BYTE:
            convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
            break;
        case SHORT:
            convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
            break;
        case INT:
            convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
            break;
        case LONG:
            convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
            break;
        case FLOAT:
            convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
            break;
        case DOUBLE:
            convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
            break;
        case STRING:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case DATE:
            convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
            break;
        case TIMESTAMP:
            convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP, 9);
            break;
        case BINARY:
            convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
            break;
        case DECIMAL:
            DecimalTypeInfo dtInf = (DecimalTypeInfo) type;
            convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale());
            break;
        case VARCHAR:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case CHAR:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case UNKNOWN:
            convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
            break;
        default:
            if (hiveShim.isIntervalYearMonthType(type.getPrimitiveCategory())) {
                convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
            } else if (hiveShim.isIntervalDayTimeType(type.getPrimitiveCategory())) {
                convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1, 1)));
            }
    }
    if (null == convertedType) {
        throw new RuntimeException("Unsupported Type : " + type.getTypeName());
    }
    return dtFactory.createTypeWithNullability(convertedType, true);
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SqlIntervalQualifier(org.apache.calcite.sql.SqlIntervalQualifier) RelDataType(org.apache.calcite.rel.type.RelDataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 3 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.

the class VectorizationContext method instantiateExpression.

public VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation, Object... args) throws HiveException {
    VectorExpression ve = null;
    Constructor<?> ctor = getConstructor(vclass);
    int numParams = ctor.getParameterTypes().length;
    int argsLength = (args == null) ? 0 : args.length;
    if (numParams == 0) {
        try {
            ve = (VectorExpression) ctor.newInstance();
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
        }
    } else if (numParams == argsLength) {
        try {
            ve = (VectorExpression) ctor.newInstance(args);
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
        }
    } else if (numParams == argsLength + 1) {
        // Additional argument is needed, which is the outputcolumn.
        Object[] newArgs = null;
        try {
            if (returnTypeInfo == null) {
                throw new HiveException("Missing output type information");
            }
            String returnTypeName = returnTypeInfo.getTypeName();
            // Special handling for decimal because decimal types need scale and precision parameter.
            // This special handling should be avoided by using returnType uniformly for all cases.
            final int outputColumnNum = ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation);
            newArgs = Arrays.copyOf(Objects.requireNonNull(args), numParams);
            newArgs[numParams - 1] = outputColumnNum;
            ve = (VectorExpression) ctor.newInstance(newArgs);
            /*
         * Caller is responsible for setting children and input type information.
         */
            ve.setOutputTypeInfo(returnTypeInfo);
            ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation);
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
        }
    }
    // Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
    if (ve instanceof TruncStringOutput) {
        TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
        if (returnTypeInfo instanceof BaseCharTypeInfo) {
            BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnTypeInfo;
            truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
        }
    }
    return ve;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TruncStringOutput(org.apache.hadoop.hive.ql.exec.vector.expressions.TruncStringOutput) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 4 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.

the class TypeConverter method convert.

public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
    RelDataType convertedType = null;
    switch(type.getPrimitiveCategory()) {
        case VOID:
            convertedType = dtFactory.createSqlType(SqlTypeName.NULL);
            break;
        case BOOLEAN:
            convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
            break;
        case BYTE:
            convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
            break;
        case SHORT:
            convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
            break;
        case INT:
            convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
            break;
        case LONG:
            convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
            break;
        case FLOAT:
            convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
            break;
        case DOUBLE:
            convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
            break;
        case STRING:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case DATE:
            convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
            break;
        case TIMESTAMP:
            convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP);
            break;
        case TIMESTAMPLOCALTZ:
            convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE);
            break;
        case INTERVAL_YEAR_MONTH:
            convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
            break;
        case INTERVAL_DAY_TIME:
            convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1, 1)));
            break;
        case BINARY:
            convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
            break;
        case DECIMAL:
            DecimalTypeInfo dtInf = (DecimalTypeInfo) type;
            convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale());
            break;
        case VARCHAR:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case CHAR:
            convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
            break;
        case UNKNOWN:
            convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
            break;
    }
    if (null == convertedType) {
        throw new RuntimeException("Unsupported Type : " + type.getTypeName());
    }
    return dtFactory.createTypeWithNullability(convertedType, true);
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SqlIntervalQualifier(org.apache.calcite.sql.SqlIntervalQualifier) RelDataType(org.apache.calcite.rel.type.RelDataType)

Example 5 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.

the class GenericUDFLower method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 1) {
        throw new UDFArgumentLengthException("LOWER requires 1 argument, got " + arguments.length);
    }
    if (arguments[0].getCategory() != Category.PRIMITIVE) {
        throw new UDFArgumentException("LOWER only takes primitive types, got " + arguments[0].getTypeName());
    }
    argumentOI = (PrimitiveObjectInspector) arguments[0];
    stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
    PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
    ObjectInspector outputOI = null;
    BaseCharTypeInfo typeInfo;
    switch(inputType) {
        case CHAR:
            // return type should have same length as the input.
            returnType = inputType;
            typeInfo = TypeInfoFactory.getCharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
            outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            break;
        case VARCHAR:
            // return type should have same length as the input.
            returnType = inputType;
            typeInfo = TypeInfoFactory.getVarcharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
            outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            break;
        default:
            returnType = PrimitiveCategory.STRING;
            outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
            break;
    }
    returnHelper = new GenericUDFUtils.StringHelper(returnType);
    return outputOI;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) PrimitiveObjectInspectorConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter) StringConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Aggregations

BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)7 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)4 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)3 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 RelDataType (org.apache.calcite.rel.type.RelDataType)2 SqlIntervalQualifier (org.apache.calcite.sql.SqlIntervalQualifier)2 SqlParserPos (org.apache.calcite.sql.parser.SqlParserPos)2 UDFArgumentLengthException (org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 PrimitiveObjectInspectorConverter (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter)2 StringConverter (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter)2 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)2 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)2