Search in sources :

Example 46 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.

private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
    final int keyCount = keyTypeNames.length;
    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
    for (int i = 0; i < keyCount; i++) {
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
        keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        keyPrimitiveCategories[i] = primitiveCategory;
        keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
    }
    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
    Arrays.fill(keyColumnSortOrderIsDesc, false);
    byte[] keyColumnNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
    byte[] keyColumnNotNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
    PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
    final int columnCount = valuePrimitiveTypeInfos.length;
    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
    final int count = rows.length;
    for (int i = 0; i < count; i++) {
        Object[] valueRow = rows[i];
        Output valueOutput = new Output();
        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) valueRow[index];
            VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
        }
        byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
        // Add a new key or add a value to an existing key?
        byte[] key;
        if (random.nextBoolean() || verifyTable.getCount() == 0) {
            Object[] keyRow = VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList, keyPrimitiveCategories, keyPrimitiveTypeInfos);
            Output keyOutput = new Output();
            keySerializeWrite.set(keyOutput);
            for (int index = 0; index < keyCount; index++) {
                Writable writable = (Writable) keyRow[index];
                VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
            }
            key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
            verifyTable.add(key, keyRow, value, valueRow);
        } else {
            key = verifyTable.addRandomExisting(value, valueRow, random);
        }
        // Serialize keyRow into key bytes.
        BytesWritable keyWritable = new BytesWritable(key);
        BytesWritable valueWritable = new BytesWritable(value);
        map.putRow(keyWritable, valueWritable);
    // verifyTable.verify(map);
    }
    verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos, doClipping, useExactBytes, random);
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)

Example 47 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class CheckFastRowHashMap method verifyHashMapRows.

public static void verifyHashMapRows(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException {
    final int count = rows.size();
    final int columnCount = typeInfos.length;
    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
    for (int a = 0; a < count; a++) {
        int valueIndex = actualToValueMap[a];
        Object[] row = rows.get(valueIndex);
        byte[] bytes = ref.getBytes();
        int offset = (int) ref.getOffset();
        int length = ref.getLength();
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        lazyBinaryDeserializeRead.set(bytes, offset, length);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
        }
        TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        ref = hashMapResult.next();
        if (a == count - 1) {
            TestCase.assertTrue(ref == null);
        } else {
            TestCase.assertTrue(ref != null);
        }
    }
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) WriteBuffers(org.apache.hadoop.hive.serde2.WriteBuffers) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 48 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestParquetSerDe method testParquetHiveSerDe.

public void testParquetHiveSerDe() throws Throwable {
    try {
        // Create the SerDe
        System.out.println("test: testParquetHiveSerDe");
        final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
        final Configuration conf = new Configuration();
        final Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        final Writable[] arr = new Writable[9];
        //primitive types
        arr[0] = new ByteWritable((byte) 123);
        arr[1] = new ShortWritable((short) 456);
        arr[2] = new IntWritable(789);
        arr[3] = new LongWritable(1000l);
        arr[4] = new DoubleWritable((double) 5.3);
        arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
        arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
        final Writable[] map = new Writable[3];
        for (int i = 0; i < 3; ++i) {
            final Writable[] pair = new Writable[2];
            pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
            pair[1] = new IntWritable(i);
            map[i] = new ArrayWritable(Writable.class, pair);
        }
        arr[7] = new ArrayWritable(Writable.class, map);
        final Writable[] array = new Writable[5];
        for (int i = 0; i < 5; ++i) {
            array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
        }
        arr[8] = new ArrayWritable(Writable.class, array);
        final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
        // Test
        deserializeAndSerializeLazySimple(serDe, arrWritable);
        System.out.println("test: testParquetHiveSerDe - OK");
    } catch (final Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 49 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestAvroSerializer method canSerializeStructs.

@Test
public void canSerializeStructs() throws SerDeException {
    String field = "{ \"name\":\"struct1\", \"type\":{\"type\":\"record\", " + "\"name\":\"struct1_name\", \"fields\": [\n" + "{ \"name\":\"sInt\", \"type\":\"int\" }, { \"name\"" + ":\"sBoolean\", \"type\":\"boolean\" }, { \"name\":\"sString\", \"type\":\"string\" } ] } }";
    Schema s = buildSchema(field);
    GenericData.Record innerRecord = new GenericData.Record(s.getField("struct1").schema());
    innerRecord.put("sInt", 77);
    innerRecord.put("sBoolean", false);
    innerRecord.put("sString", "tedious");
    GenericData.Record r = new GenericData.Record(s);
    r.put("struct1", innerRecord);
    AvroSerializer as = new AvroSerializer();
    AvroDeserializer ad = new AvroDeserializer();
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    ObjectInspector oi = aoig.getObjectInspector();
    List<String> columnNames = aoig.getColumnNames();
    List<TypeInfo> columnTypes = aoig.getColumnTypes();
    AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(r);
    agrw.setFileSchema(r.getSchema());
    Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
    Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
    assertTrue(result instanceof AvroGenericRecordWritable);
    GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
    assertEquals(s, r2.getSchema());
    GenericRecord r3 = (GenericRecord) r2.get("struct1");
    assertEquals(77, r3.get("sInt"));
    assertEquals(false, r3.get("sBoolean"));
    assertEquals("tedious", r3.get("sString"));
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Schema(org.apache.avro.Schema) Writable(org.apache.hadoop.io.Writable) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 50 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class TestAvroSerializer method serializeAndDeserialize.

/**
   * Verify that we can serialize an avro value by taking one, running it through
   * the deser process and then serialize it again.
   */
private GenericRecord serializeAndDeserialize(String recordValue, String fieldName, Object fieldValue) throws SerDeException, IOException {
    Schema s = buildSchema(recordValue);
    GenericData.Record r = new GenericData.Record(s);
    r.put(fieldName, fieldValue);
    AvroSerializer as = new AvroSerializer();
    AvroDeserializer ad = new AvroDeserializer();
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    ObjectInspector oi = aoig.getObjectInspector();
    List<String> columnNames = aoig.getColumnNames();
    List<TypeInfo> columnTypes = aoig.getColumnTypes();
    AvroGenericRecordWritable agrw = Utils.serializeAndDeserializeRecord(r);
    Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
    Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
    assertTrue(result instanceof AvroGenericRecordWritable);
    GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
    assertEquals(s, r2.getSchema());
    return r2;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Schema(org.apache.avro.Schema) Writable(org.apache.hadoop.io.Writable) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

Writable (org.apache.hadoop.io.Writable)221 IntWritable (org.apache.hadoop.io.IntWritable)103 LongWritable (org.apache.hadoop.io.LongWritable)91 BooleanWritable (org.apache.hadoop.io.BooleanWritable)75 BytesWritable (org.apache.hadoop.io.BytesWritable)74 FloatWritable (org.apache.hadoop.io.FloatWritable)73 Test (org.junit.Test)68 IOException (java.io.IOException)43 Path (org.apache.hadoop.fs.Path)43 Text (org.apache.hadoop.io.Text)40 ArrayWritable (org.apache.hadoop.io.ArrayWritable)37 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)34 SequenceFile (org.apache.hadoop.io.SequenceFile)32 Configuration (org.apache.hadoop.conf.Configuration)31 DoubleWritable (org.apache.hadoop.io.DoubleWritable)30 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)29 ByteWritable (org.apache.hadoop.io.ByteWritable)28 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)25 FileSystem (org.apache.hadoop.fs.FileSystem)24 ArrayList (java.util.ArrayList)23