Search in sources :

Example 96 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestHCatHiveThriftCompatibility method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    if (setUpComplete) {
        return;
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    TIOStreamTransport transport = new TIOStreamTransport(out);
    TBinaryProtocol protocol = new TBinaryProtocol(transport);
    IntString intString = new IntString(1, "one", 1);
    intString.write(protocol);
    BytesWritable bytesWritable = new BytesWritable(out.toByteArray());
    intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq");
    LOG.info("Creating data file: " + intStringSeq);
    SequenceFile.Writer seqFileWriter = SequenceFile.createWriter(intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, NullWritable.class, BytesWritable.class);
    seqFileWriter.append(NullWritable.get(), bytesWritable);
    seqFileWriter.close();
    setUpComplete = true;
}
Also used : Path(org.apache.hadoop.fs.Path) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) TBinaryProtocol(org.apache.thrift.protocol.TBinaryProtocol) SequenceFile(org.apache.hadoop.io.SequenceFile) TIOStreamTransport(org.apache.thrift.transport.TIOStreamTransport) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Before(org.junit.Before)

Example 97 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testConfigurableTCTLSeparated.

public void testConfigurableTCTLSeparated() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("	"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "	" + "firstString" + "" + "secondString" + "	" + "firstKey" + "" + "1" + "" + "secondKey" + "" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o[0] class = " + olist.get(0).getClass());
        System.out.println("o[1] class = " + olist.get(1).getClass());
        System.out.println("o[2] class = " + olist.get(2).getClass());
        System.out.println("o = " + o);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 98 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testNulls1.

/**
   * Tests a single null list within a struct with return nulls on.
   */
public void testNulls1() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        assertEquals(struct, o);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 99 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testNulls2.

/**
   * Tests all elements of a struct being null with return nulls on.
   */
public void testNulls2() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = null;
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(null);
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertTrue(olist.size() == 3);
        assertEquals(null, olist.get(0));
        assertEquals(null, olist.get(1));
        assertEquals(null, olist.get(2));
    // assertEquals(o, struct); Cannot do this because types of null lists are
    // wrong.
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 100 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testDynamicSerDe.

public void testDynamicSerDe() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        struct.add(Integer.valueOf(-234));
        struct.add(Double.valueOf(1.0));
        struct.add(Double.valueOf(-2.5));
        // All protocols
        ArrayList<String> protocols = new ArrayList<String>();
        ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
        ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "------"));
        protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(null);
        protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        // TSimpleJSONProtocol does not support deserialization.
        // protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
        // isBinaries.add(false);
        // additionalParams.add(null);
        // TCTLSeparatedProtocol is not done yet.
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        System.out.println("input struct = " + struct);
        for (int pp = 0; pp < protocols.size(); pp++) {
            String protocol = protocols.get(pp);
            boolean isBinary = isBinaries.get(pp);
            System.out.println("Testing protocol: " + protocol);
            Properties schema = new Properties();
            schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
            schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
            schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
            schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
            HashMap<String, String> p = additionalParams.get(pp);
            if (p != null) {
                for (Entry<String, String> e : p.entrySet()) {
                    schema.setProperty(e.getKey(), e.getValue());
                }
            }
            DynamicSerDe serde = new DynamicSerDe();
            serde.initialize(new Configuration(), schema);
            // Try getObjectInspector
            ObjectInspector oi = serde.getObjectInspector();
            System.out.println("TypeName = " + oi.getTypeName());
            // Try to serialize
            BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
            System.out.println("bytes =" + hexString(bytes));
            if (!isBinary) {
                System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
            }
            // Try to deserialize
            Object o = serde.deserialize(bytes);
            System.out.println("o class = " + o.getClass());
            List<?> olist = (List<?>) o;
            System.out.println("o size = " + olist.size());
            System.out.println("o[0] class = " + olist.get(0).getClass());
            System.out.println("o[1] class = " + olist.get(1).getClass());
            System.out.println("o[2] class = " + olist.get(2).getClass());
            System.out.println("o = " + o);
            assertEquals(struct, o);
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)245 Text (org.apache.hadoop.io.Text)63 Test (org.junit.Test)51 LongWritable (org.apache.hadoop.io.LongWritable)46 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)45 IntWritable (org.apache.hadoop.io.IntWritable)36 IOException (java.io.IOException)34 Path (org.apache.hadoop.fs.Path)34 ArrayList (java.util.ArrayList)29 Configuration (org.apache.hadoop.conf.Configuration)28 Writable (org.apache.hadoop.io.Writable)27 BooleanWritable (org.apache.hadoop.io.BooleanWritable)25 FloatWritable (org.apache.hadoop.io.FloatWritable)24 Random (java.util.Random)23 List (java.util.List)22 SequenceFile (org.apache.hadoop.io.SequenceFile)22 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)21 FileSystem (org.apache.hadoop.fs.FileSystem)19 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)17