Search in sources :

Example 6 with BinaryObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector in project presto by prestodb.

the class TestDataWritableWriter method writePrimitive.

/**
 * It writes the primitive value to the Parquet RecordConsumer.
 *
 * @param value The object that contains the primitive value.
 * @param inspector The object inspector used to get the correct value type.
 */
private void writePrimitive(final Object value, final PrimitiveObjectInspector inspector) {
    if (value == null) {
        return;
    }
    switch(inspector.getPrimitiveCategory()) {
        case VOID:
            return;
        case DOUBLE:
            recordConsumer.addDouble(((DoubleObjectInspector) inspector).get(value));
            break;
        case BOOLEAN:
            recordConsumer.addBoolean(((BooleanObjectInspector) inspector).get(value));
            break;
        case FLOAT:
            recordConsumer.addFloat(((FloatObjectInspector) inspector).get(value));
            break;
        case BYTE:
            recordConsumer.addInteger(((ByteObjectInspector) inspector).get(value));
            break;
        case INT:
            recordConsumer.addInteger(((IntObjectInspector) inspector).get(value));
            break;
        case LONG:
            recordConsumer.addLong(((LongObjectInspector) inspector).get(value));
            break;
        case SHORT:
            recordConsumer.addInteger(((ShortObjectInspector) inspector).get(value));
            break;
        case STRING:
            String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromString(v));
            break;
        case CHAR:
            String vChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(value).getStrippedValue();
            recordConsumer.addBinary(Binary.fromString(vChar));
            break;
        case VARCHAR:
            String vVarchar = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(value).getValue();
            recordConsumer.addBinary(Binary.fromString(vVarchar));
            break;
        case BINARY:
            byte[] vBinary = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromByteArray(vBinary));
            break;
        case TIMESTAMP:
            Timestamp ts = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary());
            break;
        case DECIMAL:
            HiveDecimal vDecimal = ((HiveDecimal) inspector.getPrimitiveJavaObject(value));
            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
            recordConsumer.addBinary(decimalToBinary(vDecimal, decTypeInfo));
            break;
        case DATE:
            Date vDate = ((DateObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addInteger(DateWritable.dateToDays(vDate));
            break;
        default:
            throw new IllegalArgumentException("Unsupported primitive data type: " + inspector.getPrimitiveCategory());
    }
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) Timestamp(java.sql.Timestamp) Date(java.sql.Date)

Example 7 with BinaryObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector in project hive by apache.

the class TestIcebergBinaryObjectInspector method testIcebergByteBufferObjectInspector.

@Test
public void testIcebergByteBufferObjectInspector() {
    BinaryObjectInspector oi = IcebergBinaryObjectInspector.get();
    Assert.assertEquals(ObjectInspector.Category.PRIMITIVE, oi.getCategory());
    Assert.assertEquals(PrimitiveObjectInspector.PrimitiveCategory.BINARY, oi.getPrimitiveCategory());
    Assert.assertEquals(TypeInfoFactory.binaryTypeInfo, oi.getTypeInfo());
    Assert.assertEquals(TypeInfoFactory.binaryTypeInfo.getTypeName(), oi.getTypeName());
    Assert.assertEquals(byte[].class, oi.getJavaPrimitiveClass());
    Assert.assertEquals(BytesWritable.class, oi.getPrimitiveWritableClass());
    Assert.assertNull(oi.copyObject(null));
    Assert.assertNull(oi.getPrimitiveJavaObject(null));
    Assert.assertNull(oi.getPrimitiveWritableObject(null));
    byte[] bytes = new byte[] { 0, 1, 2, 3 };
    ByteBuffer buffer = ByteBuffer.wrap(bytes);
    Assert.assertArrayEquals(bytes, oi.getPrimitiveJavaObject(buffer));
    Assert.assertEquals(new BytesWritable(bytes), oi.getPrimitiveWritableObject(buffer));
    ByteBuffer slice = ByteBuffer.wrap(bytes, 1, 2).slice();
    Assert.assertArrayEquals(new byte[] { 1, 2 }, oi.getPrimitiveJavaObject(slice));
    Assert.assertEquals(new BytesWritable(new byte[] { 1, 2 }), oi.getPrimitiveWritableObject(slice));
    slice.position(1);
    Assert.assertArrayEquals(new byte[] { 2 }, oi.getPrimitiveJavaObject(slice));
    Assert.assertEquals(new BytesWritable(new byte[] { 2 }), oi.getPrimitiveWritableObject(slice));
    byte[] copy = (byte[]) oi.copyObject(bytes);
    Assert.assertArrayEquals(bytes, copy);
    Assert.assertNotSame(bytes, copy);
    Assert.assertFalse(oi.preferWritable());
}
Also used : BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 8 with BinaryObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector in project hive by apache.

the class KuduSerDe method serialize.

/**
 * Serialize an object by navigating inside the Object with the ObjectInspector.
 */
@Override
public KuduWritable serialize(Object obj, ObjectInspector objectInspector) throws SerDeException {
    Preconditions.checkArgument(objectInspector.getCategory() == Category.STRUCT);
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    List<Object> writableObj = soi.getStructFieldsDataAsList(obj);
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    PartialRow row = schema.newPartialRow();
    for (int i = 0; i < schema.getColumnCount(); i++) {
        StructField field = fields.get(i);
        Object value = writableObj.get(i);
        if (value == null) {
            row.setNull(i);
        } else {
            Type type = schema.getColumnByIndex(i).getType();
            ObjectInspector inspector = field.getFieldObjectInspector();
            switch(type) {
                case BOOL:
                    boolean boolVal = ((BooleanObjectInspector) inspector).get(value);
                    row.addBoolean(i, boolVal);
                    break;
                case INT8:
                    byte byteVal = ((ByteObjectInspector) inspector).get(value);
                    row.addByte(i, byteVal);
                    break;
                case INT16:
                    short shortVal = ((ShortObjectInspector) inspector).get(value);
                    row.addShort(i, shortVal);
                    break;
                case INT32:
                    int intVal = ((IntObjectInspector) inspector).get(value);
                    row.addInt(i, intVal);
                    break;
                case INT64:
                    long longVal = ((LongObjectInspector) inspector).get(value);
                    row.addLong(i, longVal);
                    break;
                case UNIXTIME_MICROS:
                    // Calling toSqlTimestamp and using the addTimestamp API ensures we properly
                    // convert Hive localDateTime to UTC.
                    java.sql.Timestamp timestampVal = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value).toSqlTimestamp();
                    row.addTimestamp(i, timestampVal);
                    break;
                case DECIMAL:
                    HiveDecimal decimalVal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addDecimal(i, decimalVal.bigDecimalValue());
                    break;
                case FLOAT:
                    float floatVal = ((FloatObjectInspector) inspector).get(value);
                    row.addFloat(i, floatVal);
                    break;
                case DOUBLE:
                    double doubleVal = ((DoubleObjectInspector) inspector).get(value);
                    row.addDouble(i, doubleVal);
                    break;
                case STRING:
                    String stringVal = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addString(i, stringVal);
                    break;
                case BINARY:
                    byte[] bytesVal = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addBinary(i, bytesVal);
                    break;
                default:
                    throw new SerDeException("Unsupported column type: " + type.name());
            }
        }
    }
    return new KuduWritable(row);
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PartialRow(org.apache.kudu.client.PartialRow) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) Type(org.apache.kudu.Type) KuduHiveUtils.toHiveType(org.apache.hadoop.hive.kudu.KuduHiveUtils.toHiveType) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 9 with BinaryObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector in project hive by apache.

the class GenericUDFNDVComputeBitVector method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments[0].getCategory() != Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(0, "ndv_compute_bitvector input only takes primitive types, got " + arguments[0].getTypeName());
    }
    PrimitiveObjectInspector objectInspector = (PrimitiveObjectInspector) arguments[0];
    if (objectInspector.getPrimitiveCategory() != PrimitiveCategory.BINARY) {
        throw new UDFArgumentTypeException(0, "ndv_compute_bitvector input only takes BINARY type, got " + arguments[0].getTypeName());
    }
    inputOI = (BinaryObjectInspector) arguments[0];
    return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
}
Also used : UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 10 with BinaryObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector in project hive by apache.

the class GenericUDFInBloomFilter method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    // Return if either of the arguments is null
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }
    if (!initializedBloomFilter) {
        // Setup the bloom filter once
        InputStream in = null;
        try {
            BytesWritable bw = (BytesWritable) arguments[1].get();
            byte[] bytes = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
            in = new NonSyncByteArrayInputStream(bytes);
            bloomFilter = BloomKFilter.deserialize(in);
        } catch (IOException e) {
            throw new HiveException(e);
        } finally {
            IOUtils.closeStream(in);
        }
        initializedBloomFilter = true;
    }
    // Check if the value is in bloom filter
    switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
        case BOOLEAN:
            boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vBoolean ? 1 : 0);
        case BYTE:
            byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vByte);
        case SHORT:
            short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vShort);
        case INT:
            int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vInt);
        case LONG:
            long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vLong);
        case FLOAT:
            float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vFloat);
        case DOUBLE:
            double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vDouble);
        case DECIMAL:
            HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            int startIdx = vDecimal.toBytes(scratchBuffer);
            return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
        case DATE:
            DateWritableV2 vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testLong(vDate.getDays());
        case TIMESTAMP:
            Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
            return bloomFilter.testLong(vTimeStamp.toEpochMilli());
        case CHAR:
            Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
            return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
        case VARCHAR:
            Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
            return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
        case STRING:
            Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
        case BINARY:
            BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
        default:
            throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) InputStream(java.io.InputStream) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Aggregations

BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)33 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)26 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)23 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)19 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)17 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)17 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)16 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)15 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)15 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)15 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)15 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)14 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)13 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)12 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)12 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)12 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)11 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)11 BytesWritable (org.apache.hadoop.io.BytesWritable)11