Search in sources :

Example 36 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestDeepParquetHiveMapInspector method testHashMap.

@Test
public void testHashMap() {
    final Map<Writable, Writable> map = new HashMap<Writable, Writable>();
    map.put(new IntWritable(0), new IntWritable(1));
    map.put(new IntWritable(2), new IntWritable(3));
    map.put(new IntWritable(4), new IntWritable(5));
    map.put(new IntWritable(6), new IntWritable(7));
    assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0)));
    assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2)));
    assertEquals("Wrong result of inspection", new IntWritable(5), inspector.getMapValueElement(map, new IntWritable(4)));
    assertEquals("Wrong result of inspection", new IntWritable(7), inspector.getMapValueElement(map, new IntWritable(6)));
    assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new ShortWritable((short) 0)));
    assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new ShortWritable((short) 2)));
    assertEquals("Wrong result of inspection", new IntWritable(5), inspector.getMapValueElement(map, new ShortWritable((short) 4)));
    assertEquals("Wrong result of inspection", new IntWritable(7), inspector.getMapValueElement(map, new ShortWritable((short) 6)));
}
Also used : HashMap(java.util.HashMap) Writable(org.apache.hadoop.io.Writable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 37 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestStandardParquetHiveMapInspector method testRegularMap.

@Test
public void testRegularMap() {
    final Writable[] entry1 = new Writable[] { new IntWritable(0), new IntWritable(1) };
    final Writable[] entry2 = new Writable[] { new IntWritable(2), new IntWritable(3) };
    final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[] { new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2) });
    assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0)));
    assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2)));
    assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 0)));
    assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 2)));
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) Writable(org.apache.hadoop.io.Writable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 38 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 39 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < columnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
            Object object;
            if (lazyPrimitive != null) {
                object = lazyPrimitive.getWritableObject();
            } else {
                object = null;
            }
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 40 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class CreateSequenceFile method main.

public static void main(String[] args) throws Exception {
    // Read parameters
    int lines = 10;
    List<String> extraArgs = new ArrayList<String>();
    for (int ai = 0; ai < args.length; ai++) {
        if (args[ai].equals("-line") && ai + 1 < args.length) {
            lines = Integer.parseInt(args[ai + 1]);
            ai++;
        } else {
            extraArgs.add(args[ai]);
        }
    }
    if (extraArgs.size() != 1) {
        usage();
    }
    JobConf conf = new JobConf(CreateSequenceFile.class);
    ThriftSerializer serializer = new ThriftSerializer();
    // Open files
    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)), BytesWritable.class, BytesWritable.class);
    // write to file
    BytesWritable key = new BytesWritable();
    Random rand = new Random(20081215);
    for (int i = 0; i < lines; i++) {
        ArrayList<Integer> alist = new ArrayList<Integer>();
        alist.add(i);
        alist.add(i * 2);
        alist.add(i * 3);
        ArrayList<String> slist = new ArrayList<String>();
        slist.add("" + i * 10);
        slist.add("" + i * 100);
        slist.add("" + i * 1000);
        ArrayList<IntString> islist = new ArrayList<IntString>();
        islist.add(new IntString(i * i, "" + i * i * i, i));
        HashMap<String, String> hash = new HashMap<String, String>();
        hash.put("key_" + i, "value_" + i);
        Map<String, Map<String, Map<String, PropValueUnion>>> unionMap = new HashMap<String, Map<String, Map<String, PropValueUnion>>>();
        Map<String, Map<String, PropValueUnion>> erMap = new HashMap<String, Map<String, PropValueUnion>>();
        Map<String, PropValueUnion> attrMap = new HashMap<String, PropValueUnion>();
        erMap.put("erVal" + i, attrMap);
        attrMap.put("value_" + i, PropValueUnion.doubleValue(1.0));
        unionMap.put("key_" + i, erMap);
        Complex complex = new Complex(rand.nextInt(), "record_" + (new Integer(i)).toString(), alist, slist, islist, hash, unionMap, PropValueUnion.stringValue("test" + i), PropValueUnion.unionMStringString(hash), PropValueUnion.lString(slist));
        Writable value = serializer.serialize(complex);
        writer.append(key, value);
    }
    // Add an all-null record
    Complex complex = new Complex(0, null, null, null, null, null, null, null, null, null);
    Writable value = serializer.serialize(complex);
    writer.append(key, value);
    // Close files
    writer.close();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PropValueUnion(org.apache.hadoop.hive.serde2.thrift.test.PropValueUnion) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) Complex(org.apache.hadoop.hive.serde2.thrift.test.Complex) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) SequenceFile(org.apache.hadoop.io.SequenceFile) Random(java.util.Random) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Writable (org.apache.hadoop.io.Writable)221 IntWritable (org.apache.hadoop.io.IntWritable)103 LongWritable (org.apache.hadoop.io.LongWritable)91 BooleanWritable (org.apache.hadoop.io.BooleanWritable)75 BytesWritable (org.apache.hadoop.io.BytesWritable)74 FloatWritable (org.apache.hadoop.io.FloatWritable)73 Test (org.junit.Test)68 IOException (java.io.IOException)43 Path (org.apache.hadoop.fs.Path)43 Text (org.apache.hadoop.io.Text)40 ArrayWritable (org.apache.hadoop.io.ArrayWritable)37 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)34 SequenceFile (org.apache.hadoop.io.SequenceFile)32 Configuration (org.apache.hadoop.conf.Configuration)31 DoubleWritable (org.apache.hadoop.io.DoubleWritable)30 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)29 ByteWritable (org.apache.hadoop.io.ByteWritable)28 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)25 FileSystem (org.apache.hadoop.fs.FileSystem)24 ArrayList (java.util.ArrayList)23