Search in sources :

Example 21 with LazyStruct

use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    TypeInfo[] writeTypeInfos = typeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writeTypeInfos = Arrays.copyOf(typeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, typeInfos[index], row[index]);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyReadNull(lazySimpleDeserializeRead, typeInfos[index]);
            } else {
                Object expectedObject = row[index];
                verifyRead(lazySimpleDeserializeRead, typeInfos[index], expectedObject);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) {
        BytesWritable bytesWritable = serializeWriteBytes[rowIndex];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[rowIndex];
        for (int index = 0; index < columnCount; index++) {
            TypeInfo typeInfo = typeInfos[index];
            Object expectedObject = row[index];
            Object object = lazySimpleStruct.getField(index);
            if (expectedObject == null || object == null) {
                if (expectedObject != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!VerifyLazy.lazyCompare(typeInfo, object, expectedObject)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyReadNull(lazySimpleDeserializeRead, typeInfos[index]);
            } else {
                Object expectedObject = row[index];
                verifyRead(lazySimpleDeserializeRead, typeInfos[index], expectedObject);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject)

Example 22 with LazyStruct

use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.

the class LazySimpleSerDe method initialize.

/**
 * Initialize the SerDe given the parameters. serialization.format: separator
 * char or byte code (only supports byte-value up to 127) columns:
 * ","-separated column names columns.types: ",", ":", or ";"-separated column
 * types
 */
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    serdeParams = new LazySerDeParameters(configuration, this.properties, getClass().getName());
    // Create the ObjectInspectors for the fields
    cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), new LazyObjectInspectorParametersImpl(serdeParams));
    cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) LazyObjectInspectorParametersImpl(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl)

Example 23 with LazyStruct

use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.

the class TestAvroLazyObjectInspector method testEmptyData.

@Test
public void testEmptyData() {
    List<String> fieldNames = new ArrayList<String>();
    fieldNames.add("myField");
    List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    ois.add(LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, Byte.valueOf((byte) 0)));
    AvroLazyObjectInspector aloi = new AvroLazyObjectInspector(fieldNames, ois, null, (byte) 0, new Text(), false, false, (byte) 0);
    LazyStruct lazyStruct = new LazyStruct(LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(fieldNames, ois, (byte) 0, new Text(), false, false, (byte) 0));
    ByteArrayRef byteArrayRef = new ByteArrayRef();
    // set data to empty explicitly
    byteArrayRef.setData(new byte[0]);
    lazyStruct.init(byteArrayRef, 0, 0);
    assertNull(aloi.getStructFieldData(lazyStruct, new TestStructField()));
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Aggregations

LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)14 Configuration (org.apache.hadoop.conf.Configuration)11 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)11 Test (org.junit.Test)11 Properties (java.util.Properties)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)9 Text (org.apache.hadoop.io.Text)9 Mutation (org.apache.accumulo.core.data.Mutation)8 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)8 ArrayList (java.util.ArrayList)7 Entry (java.util.Map.Entry)6 Connector (org.apache.accumulo.core.client.Connector)4 Instance (org.apache.accumulo.core.client.Instance)4 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)4 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)4 ColumnUpdate (org.apache.accumulo.core.data.ColumnUpdate)4 Key (org.apache.accumulo.core.data.Key)4 Value (org.apache.accumulo.core.data.Value)4 Authorizations (org.apache.accumulo.core.security.Authorizations)4