Search in sources :

Example 1 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testDynamicSerDe.

public void testDynamicSerDe() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        struct.add(Integer.valueOf(-234));
        struct.add(Double.valueOf(1.0));
        struct.add(Double.valueOf(-2.5));
        // All protocols
        ArrayList<String> protocols = new ArrayList<String>();
        ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
        ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "------"));
        protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(null);
        protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        // TSimpleJSONProtocol does not support deserialization.
        // protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
        // isBinaries.add(false);
        // additionalParams.add(null);
        // TCTLSeparatedProtocol is not done yet.
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        System.out.println("input struct = " + struct);
        for (int pp = 0; pp < protocols.size(); pp++) {
            String protocol = protocols.get(pp);
            boolean isBinary = isBinaries.get(pp);
            System.out.println("Testing protocol: " + protocol);
            Properties schema = new Properties();
            schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
            schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
            schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
            schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
            HashMap<String, String> p = additionalParams.get(pp);
            if (p != null) {
                for (Entry<String, String> e : p.entrySet()) {
                    schema.setProperty(e.getKey(), e.getValue());
                }
            }
            DynamicSerDe serde = new DynamicSerDe();
            serde.initialize(new Configuration(), schema);
            // Try getObjectInspector
            ObjectInspector oi = serde.getObjectInspector();
            System.out.println("TypeName = " + oi.getTypeName());
            // Try to serialize
            BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
            System.out.println("bytes =" + hexString(bytes));
            if (!isBinary) {
                System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
            }
            // Try to deserialize
            Object o = serde.deserialize(bytes);
            System.out.println("o class = " + o.getClass());
            List<?> olist = (List<?>) o;
            System.out.println("o size = " + olist.size());
            System.out.println("o[0] class = " + olist.get(0).getClass());
            System.out.println("o[1] class = " + olist.get(1).getClass());
            System.out.println("o[2] class = " + olist.get(2).getClass());
            System.out.println("o = " + o);
            assertEquals(struct, o);
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 2 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class MultiDelimitSerDe method serializeNoEncode.

// This is basically the same as LazySimpleSerDe.serialize. Except that we don't use
// Base64 to encode binary data because we're using printable string as delimiter.
// Consider such a row "strAQ==\1", str is a string, AQ== is the delimiter and \1
// is the binary data.
private static void serializeNoEncode(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException {
    if (obj == null) {
        out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        return;
    }
    char separator;
    List<?> list;
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector oi = (PrimitiveObjectInspector) objInspector;
            if (oi.getPrimitiveCategory() == PrimitiveCategory.BINARY) {
                BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(obj);
                byte[] toWrite = new byte[bw.getLength()];
                System.arraycopy(bw.getBytes(), 0, toWrite, 0, bw.getLength());
                out.write(toWrite, 0, toWrite.length);
            } else {
                LazyUtils.writePrimitiveUTF8(out, obj, oi, escaped, escapeChar, needsEscape);
            }
            return;
        case LIST:
            separator = (char) separators[level];
            ListObjectInspector loi = (ListObjectInspector) objInspector;
            list = loi.getList(obj);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case MAP:
            separator = (char) separators[level];
            char keyValueSeparator = (char) separators[level + 1];
            MapObjectInspector moi = (MapObjectInspector) objInspector;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(obj);
            if (map == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        out.write(separator);
                    }
                    serializeNoEncode(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                    out.write(keyValueSeparator);
                    serializeNoEncode(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case STRUCT:
            separator = (char) separators[level];
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
    }
    throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class VectorUDFAdaptor method setOutputCol.

private void setOutputCol(ColumnVector colVec, int i, Object value) {
    /* Depending on the output type, get the value, cast the result to the
     * correct type if needed, and assign the result into the output vector.
     */
    if (outputOI instanceof WritableStringObjectInspector) {
        BytesColumnVector bv = (BytesColumnVector) colVec;
        Text t;
        if (value instanceof String) {
            t = new Text((String) value);
        } else {
            t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
        }
        bv.setVal(i, t.getBytes(), 0, t.getLength());
    } else if (outputOI instanceof WritableHiveCharObjectInspector) {
        WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI;
        int maxLength = ((CharTypeInfo) writableHiveCharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveCharWritable hiveCharWritable;
        if (value instanceof HiveCharWritable) {
            hiveCharWritable = ((HiveCharWritable) value);
        } else {
            hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveCharWritable.getTextValue();
        // In vector mode, we stored CHAR as unpadded.
        StringExpr.rightTrimAndTruncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableHiveVarcharObjectInspector) {
        WritableHiveVarcharObjectInspector writableHiveVarcharObjectOI = (WritableHiveVarcharObjectInspector) outputOI;
        int maxLength = ((VarcharTypeInfo) writableHiveVarcharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveVarcharWritable hiveVarcharWritable;
        if (value instanceof HiveVarcharWritable) {
            hiveVarcharWritable = ((HiveVarcharWritable) value);
        } else {
            hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveVarcharWritable.getTextValue();
        StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableIntObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Integer) {
            lv.vector[i] = (Integer) value;
        } else {
            lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableLongObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Long) {
            lv.vector[i] = (Long) value;
        } else {
            lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableDoubleObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Double) {
            dv.vector[i] = (Double) value;
        } else {
            dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableFloatObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Float) {
            dv.vector[i] = (Float) value;
        } else {
            dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableShortObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Short) {
            lv.vector[i] = (Short) value;
        } else {
            lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableByteObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Byte) {
            lv.vector[i] = (Byte) value;
        } else {
            lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableTimestampObjectInspector) {
        TimestampColumnVector tv = (TimestampColumnVector) colVec;
        Timestamp ts;
        if (value instanceof Timestamp) {
            ts = (Timestamp) value;
        } else {
            ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        tv.set(i, ts);
    } else if (outputOI instanceof WritableDateObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        Date ts;
        if (value instanceof Date) {
            ts = (Date) value;
        } else {
            ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        long l = DateWritable.dateToDays(ts);
        lv.vector[i] = l;
    } else if (outputOI instanceof WritableBooleanObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Boolean) {
            lv.vector[i] = (Boolean) value ? 1 : 0;
        } else {
            lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
        }
    } else if (outputOI instanceof WritableHiveDecimalObjectInspector) {
        DecimalColumnVector dcv = (DecimalColumnVector) colVec;
        if (value instanceof HiveDecimal) {
            dcv.set(i, (HiveDecimal) value);
        } else {
            HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value);
            dcv.set(i, hd);
        }
    } else if (outputOI instanceof WritableBinaryObjectInspector) {
        BytesWritable bw = (BytesWritable) value;
        BytesColumnVector bv = (BytesColumnVector) colVec;
        bv.setVal(i, bw.getBytes(), 0, bw.getLength());
    } else {
        throw new RuntimeException("Unhandled object type " + outputOI.getTypeName() + " inspector class " + outputOI.getClass().getName() + " value class " + value.getClass().getName());
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Timestamp(java.sql.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) Date(java.sql.Date) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)

Example 4 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.

private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
    StringBuffer names = new StringBuffer();
    names.append("dbName,tableName,");
    StringBuffer types = new StringBuffer();
    types.append("string,string,");
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
    try {
        serDe.initialize(conf, props);
        List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
        List<String> keys = new ArrayList<>();
        for (int i = 0; i < deserializedkeys.size(); i++) {
            Object deserializedKey = deserializedkeys.get(i);
            if (deserializedKey == null) {
                throw new RuntimeException("Can't have a null dbname or tablename");
            } else {
                TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
                ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
                Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
                keys.add((String) converter.convert(deserializedKey));
            }
        }
        return keys;
    } catch (SerDeException e) {
        throw new RuntimeException("Error when deserialize key", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteString(com.google.protobuf.ByteString) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) List(java.util.List) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 5 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class PartitionKeyComparator method compareTo.

@Override
public int compareTo(byte[] value, int offset, int length) {
    byte[] bytes = Arrays.copyOfRange(value, offset, offset + length);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Get key " + new String(bytes));
    }
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    List deserializedkeys = null;
    try {
        serDe.initialize(new Configuration(), serdeProps);
        deserializedkeys = ((List) serDe.deserialize(new BytesWritable(bytes))).subList(2, 2 + names.split(",").length);
    } catch (SerDeException e) {
        // don't bother with failed deserialization, continue with next key
        return 1;
    }
    for (int i = 0; i < ranges.size(); i++) {
        Range range = ranges.get(i);
        NativeRange nativeRange = nativeRanges.get(i);
        Comparable partVal = (Comparable) deserializedkeys.get(nativeRange.pos);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Try to match range " + partVal + ", start " + nativeRange.start + ", end " + nativeRange.end);
        }
        if (range.start == null || range.start.inclusive && partVal.compareTo(nativeRange.start) >= 0 || !range.start.inclusive && partVal.compareTo(nativeRange.start) > 0) {
            if (range.end == null || range.end.inclusive && partVal.compareTo(nativeRange.end) <= 0 || !range.end.inclusive && partVal.compareTo(nativeRange.end) < 0) {
                continue;
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Fail to match range " + range.keyName + "-" + partVal + "[" + nativeRange.start + "," + nativeRange.end + "]");
        }
        return 1;
    }
    for (int i = 0; i < ops.size(); i++) {
        Operator op = ops.get(i);
        NativeOperator nativeOp = nativeOps.get(i);
        switch(op.type) {
            case LIKE:
                if (!deserializedkeys.get(nativeOp.pos).toString().matches(op.val)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ") LIKE " + nativeOp.val);
                    }
                    return 1;
                }
                break;
            case NOTEQUALS:
                if (nativeOp.val.equals(deserializedkeys.get(nativeOp.pos))) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ")!=" + nativeOp.val);
                    }
                    return 1;
                }
                break;
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("All conditions satisfied:" + deserializedkeys);
    }
    return 0;
}
Also used : ByteArrayComparable(org.apache.hadoop.hbase.filter.ByteArrayComparable) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) ArrayList(java.util.ArrayList) List(java.util.List) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)339 Test (org.junit.Test)92 Text (org.apache.hadoop.io.Text)81 LongWritable (org.apache.hadoop.io.LongWritable)66 IntWritable (org.apache.hadoop.io.IntWritable)54 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 ArrayList (java.util.ArrayList)48 List (java.util.List)48 Path (org.apache.hadoop.fs.Path)47 IOException (java.io.IOException)42 Configuration (org.apache.hadoop.conf.Configuration)41 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Writable (org.apache.hadoop.io.Writable)36 BooleanWritable (org.apache.hadoop.io.BooleanWritable)35 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)27 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)26 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)25 Random (java.util.Random)24