Search in sources :

Example 66 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class GenericUDFInBloomFilter method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    // Return if either of the arguments is null
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }
    if (!initializedBloomFilter) {
        // Setup the bloom filter once
        InputStream in = null;
        try {
            BytesWritable bw = (BytesWritable) arguments[1].get();
            byte[] bytes = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
            in = new NonSyncByteArrayInputStream(bytes);
            bloomFilter = BloomKFilter.deserialize(in);
        } catch (IOException e) {
            throw new HiveException(e);
        } finally {
            IOUtils.closeStream(in);
        }
        initializedBloomFilter = true;
    }
    // Check if the value is in bloom filter
    switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
        case BOOLEAN:
            boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vBoolean ? 1 : 0);
        case BYTE:
            byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vByte);
        case SHORT:
            short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vShort);
        case INT:
            int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vInt);
        case LONG:
            long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vLong);
        case FLOAT:
            float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vFloat);
        case DOUBLE:
            double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vDouble);
        case DECIMAL:
            HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            int startIdx = vDecimal.toBytes(scratchBuffer);
            return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
        case DATE:
            DateWritableV2 vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testLong(vDate.getDays());
        case TIMESTAMP:
            Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
            return bloomFilter.testLong(vTimeStamp.toEpochMilli());
        case CHAR:
            Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
            return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
        case VARCHAR:
            Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
            return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
        case STRING:
            Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
        case BINARY:
            BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
        default:
            throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) InputStream(java.io.InputStream) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Example 67 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class GenericUDFDateAdd method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 2) {
        throw new UDFArgumentLengthException("date_add() requires 2 argument, got " + arguments.length);
    }
    if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + arguments[0].getTypeName() + " is passed. as first arguments");
    }
    if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " + arguments[1].getTypeName() + " is passed. as second arguments");
    }
    inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
    ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
    switch(inputType1) {
        case STRING:
        case VARCHAR:
        case CHAR:
            inputType1 = PrimitiveCategory.STRING;
            dateConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
            break;
        case TIMESTAMP:
            dateConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
            break;
        case DATE:
            dateConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableDateObjectInspector);
            break;
        default:
            throw new UDFArgumentException(" DATE_ADD() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + inputType1);
    }
    inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory();
    switch(inputType2) {
        case BYTE:
            daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableByteObjectInspector);
            break;
        case SHORT:
            daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            break;
        case INT:
            daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableIntObjectInspector);
            break;
        default:
            throw new UDFArgumentException(" DATE_ADD() only takes TINYINT/SMALLINT/INT types as second argument, got " + inputType2);
    }
    return outputOI;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) TimestampConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 68 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class GenericUDFDatetimeLegacyHybridCalendar method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length < 1) {
        throw new UDFArgumentLengthException("The function datetime_legacy_hybrid_calendar requires at least one argument, got " + arguments.length);
    }
    try {
        inputOI = (PrimitiveObjectInspector) arguments[0];
        PrimitiveCategory pc = inputOI.getPrimitiveCategory();
        switch(pc) {
            case DATE:
                formatter = new SimpleDateFormat("yyyy-MM-dd");
                formatter.setLenient(false);
                formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
                converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.writableDateObjectInspector);
                resultOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
                break;
            case TIMESTAMP:
                formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                formatter.setLenient(false);
                formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
                converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
                resultOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
                break;
            default:
                throw new UDFArgumentException("datetime_legacy_hybrid_calendar only allows date or timestamp types");
        }
    } catch (ClassCastException e) {
        throw new UDFArgumentException("The function datetime_legacy_hybrid_calendar takes only primitive types");
    }
    return resultOI;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SimpleDateFormat(java.text.SimpleDateFormat)

Example 69 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class GenericUDFCastFormat method initialize.

/**
 * @param arguments
 *  0. const int, value of a HiveParser_IdentifiersParser constant which represents a TOK_[TYPE]
 *  1. expression to convert
 *  2. constant string, format pattern
 *  3. (optional) constant int, output char/varchar length
 */
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 3 && arguments.length != 4) {
        throw new UDFArgumentException("Function cast_format requires 3 or 4 arguments (int, expression, StringLiteral" + "[, var/char length]), got " + arguments.length);
    }
    outputOI = getOutputOI(arguments);
    try {
        inputOI = (PrimitiveObjectInspector) arguments[1];
    } catch (ClassCastException e) {
        throw new UDFArgumentException("Function CAST...as ... FORMAT ...takes only primitive types");
    }
    PrimitiveObjectInspectorUtils.PrimitiveGrouping inputPG = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputOI.getPrimitiveCategory());
    PrimitiveObjectInspectorUtils.PrimitiveGrouping outputPG = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(outputOI.getPrimitiveCategory());
    if (inputOI.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMPLOCALTZ) {
        throw new UDFArgumentException("Timestamp with local time zone not yet supported for cast ... format function");
    }
    if (!(inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP)) {
        throw new UDFArgumentException("Function CAST...as ... FORMAT ... only converts datetime objects to string types" + " and string or void objects to datetime types. Type of object provided: " + outputOI.getPrimitiveCategory() + " in primitive grouping " + inputPG + ", type provided: " + inputOI.getPrimitiveCategory() + " in primitive grouping " + outputPG);
    }
    boolean forParsing = (outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP);
    formatter = new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 2), forParsing);
    return outputOI;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) HiveSqlDateTimeFormatter(org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils)

Example 70 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class GenericUDFDecode method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 2) {
        throw new UDFArgumentLengthException("Decode() requires exactly two arguments");
    }
    if (arguments[0].getCategory() != Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(0, "The first argument to Decode() must be primitive");
    }
    PrimitiveCategory category = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
    if (category == PrimitiveCategory.BINARY) {
        bytesOI = (BinaryObjectInspector) arguments[0];
    } else if (category == PrimitiveCategory.VOID) {
        bytesOI = (VoidObjectInspector) arguments[0];
    } else {
        throw new UDFArgumentTypeException(0, "The first argument to Decode() must be binary");
    }
    if (arguments[1].getCategory() != Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(1, "The second argument to Decode() must be primitive");
    }
    charsetOI = (PrimitiveObjectInspector) arguments[1];
    if (PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping(charsetOI.getPrimitiveCategory())) {
        throw new UDFArgumentTypeException(1, "The second argument to Decode() must be from string group");
    }
    // If the character set for decoding is constant, we can optimize that
    if (arguments[1] instanceof ConstantObjectInspector) {
        String charSetName = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue().toString();
        decoder = Charset.forName(charSetName).newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
Also used : UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) VoidObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)83 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)74 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)64 ArrayList (java.util.ArrayList)54 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)52 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)47 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)46 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)45 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)44 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)43 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)38 BytesWritable (org.apache.hadoop.io.BytesWritable)36 Text (org.apache.hadoop.io.Text)35 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)34 List (java.util.List)30 Map (java.util.Map)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)30 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)27