Search in sources :

Example 1 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestVectorSerDeRow method testVectorSerializeRow.

void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
                byte separator = (byte) '\t';
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
                false, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestRCFile method writeTest.

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException {
    cleanup();
    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen);
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 3 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 4 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestRCFile method partialReadTest.

private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(2));
    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.appendReadColumns(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int i : readCols) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
        }
        assertEquals("Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass());
        BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
        assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) LongWritable(org.apache.hadoop.io.LongWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 5 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class LazyBinarySerDe method deserialize.

/**
   * Deserialize a table record to a lazybinary struct.
   */
@Override
public Object deserialize(Writable field) throws SerDeException {
    if (byteArrayRef == null) {
        byteArrayRef = new ByteArrayRef();
    }
    BinaryComparable b = (BinaryComparable) field;
    if (b.getLength() == 0) {
        return null;
    }
    byteArrayRef.setData(b.getBytes());
    cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getLength());
    lastOperationSerialize = false;
    lastOperationDeserialize = true;
    return cachedLazyBinaryStruct;
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)117 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)101 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)97 ArrayList (java.util.ArrayList)93 Test (org.junit.Test)58 Properties (java.util.Properties)55 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)50 Text (org.apache.hadoop.io.Text)49 Configuration (org.apache.hadoop.conf.Configuration)48 IOException (java.io.IOException)36 Schema (org.apache.avro.Schema)31 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)26 Put (org.apache.hadoop.hbase.client.Put)22 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)22 IntWritable (org.apache.hadoop.io.IntWritable)22 KeyValue (org.apache.hadoop.hbase.KeyValue)20 Result (org.apache.hadoop.hbase.client.Result)20