Search in sources :

Example 1 with NonSyncByteArrayInputStream

use of org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream in project hive by apache.

the class VectorInBloomFilterColDynamicValue method initValue.

private void initValue() {
    InputStream in = null;
    try {
        Object val = bloomFilterDynamicValue.getValue();
        if (val != null) {
            BinaryObjectInspector boi = (BinaryObjectInspector) bloomFilterDynamicValue.getObjectInspector();
            byte[] bytes = boi.getPrimitiveJavaObject(val);
            in = new NonSyncByteArrayInputStream(bytes);
            bloomFilter = BloomKFilter.deserialize(in);
        } else {
            bloomFilter = null;
        }
        initialized = true;
    } catch (Exception err) {
        throw new RuntimeException(err);
    } finally {
        IOUtils.closeStream(in);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 2 with NonSyncByteArrayInputStream

use of org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream in project hive by apache.

the class GenericUDFInBloomFilter method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    // Return if either of the arguments is null
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }
    if (!initializedBloomFilter) {
        // Setup the bloom filter once
        InputStream in = null;
        try {
            BytesWritable bw = (BytesWritable) arguments[1].get();
            byte[] bytes = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
            in = new NonSyncByteArrayInputStream(bytes);
            bloomFilter = BloomKFilter.deserialize(in);
        } catch (IOException e) {
            throw new HiveException(e);
        } finally {
            IOUtils.closeStream(in);
        }
        initializedBloomFilter = true;
    }
    // Check if the value is in bloom filter
    switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
        case BOOLEAN:
            boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vBoolean ? 1 : 0);
        case BYTE:
            byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vByte);
        case SHORT:
            short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vShort);
        case INT:
            int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vInt);
        case LONG:
            long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vLong);
        case FLOAT:
            float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vFloat);
        case DOUBLE:
            double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vDouble);
        case DECIMAL:
            HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            int startIdx = vDecimal.toBytes(scratchBuffer);
            return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
        case DATE:
            DateWritable vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testLong(vDate.getDays());
        case TIMESTAMP:
            Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
            return bloomFilter.testLong(vTimeStamp.getTime());
        case CHAR:
            Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
            return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
        case VARCHAR:
            Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
            return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
        case STRING:
            Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
        case BINARY:
            BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
        default:
            throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) InputStream(java.io.InputStream) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) BytesWritable(org.apache.hadoop.io.BytesWritable) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Aggregations

ByteArrayInputStream (java.io.ByteArrayInputStream)2 InputStream (java.io.InputStream)2 NonSyncByteArrayInputStream (org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 IOException (java.io.IOException)1 Timestamp (java.sql.Timestamp)1 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)1 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)1 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)1 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 Text (org.apache.hadoop.io.Text)1