Search in sources :

Example 1 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < columnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
            Object object;
            if (lazyPrimitive != null) {
                object = lazyPrimitive.getWritableObject();
            } else {
                object = null;
            }
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 2 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class ColumnMappings method setHiveColumnDescription.

void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
    if (columnsMapping.length != columnNames.size()) {
        throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
    }
    // where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
    for (int i = 0; i < columnNames.size(); i++) {
        ColumnMapping colMap = columnsMapping[i];
        colMap.columnName = columnNames.get(i);
        colMap.columnType = columnTypes.get(i);
        if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
                throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
            }
        }
        if (colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
                throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
            }
        }
    }
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 3 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class LazyHBaseCellMap method getMapValueElement.

/**
   * Get the value in the map for the given key.
   *
   * @param key
   * @return
   */
@Override
public Object getMapValueElement(Object key) {
    if (!getParsed()) {
        parse();
    }
    for (Map.Entry<Object, Object> entry : cachedMap.entrySet()) {
        LazyPrimitive<?, ?> lazyKeyI = (LazyPrimitive<?, ?>) entry.getKey();
        // getWritableObject() will convert LazyPrimitive to actual primitive
        // writable objects.
        Object keyI = lazyKeyI.getWritableObject();
        if (keyI == null) {
            continue;
        }
        if (keyI.equals(key)) {
            // Got a match, return the value
            Object _value = entry.getValue();
            // Else return it as it is.
            if (_value instanceof LazyObject) {
                LazyObject<?> v = (LazyObject<?>) entry.getValue();
                return v == null ? v : v.getObject();
            } else {
                return _value;
            }
        }
    }
    return null;
}
Also used : LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) NavigableMap(java.util.NavigableMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive)

Example 4 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestHBaseSerDe method deserializeAndSerialize.

private void deserializeAndSerialize(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData) throws SerDeException {
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(9, fieldRefs.size());
    // Deserialize
    Object row = serDe.deserialize(new ResultWritable(r));
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        if (fieldData != null) {
            fieldData = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        }
        assertEquals("Field " + i, expectedFieldsData[i], fieldData);
    }
    // Serialize
    assertEquals(PutWritable.class, serDe.getSerializedClass());
    PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi);
    assertEquals("Serialized data", p.toString(), String.valueOf(serializedPut.getPut()));
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 5 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestHBaseSerDe method deserializeAndSerializeHiveStructColumnFamily.

private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    Object row = serDe.deserialize(new ResultWritable(r));
    int k = 0;
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
        assertNotNull(fieldData);
        if (fieldData instanceof LazyPrimitive<?, ?>) {
            assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
        } else if (fieldData instanceof LazyHBaseCellMap) {
            for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
                assertEquals(expectedFieldsData[k + 1], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k)).toString().trim());
                k++;
            }
            assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
            // Make sure that the unwanted key is not present in the map
            assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
        } else {
            fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
        }
    }
    SerDeUtils.getJSONString(row, soi);
    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
    assertNotNull(put);
}
Also used : LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Put(org.apache.hadoop.hbase.client.Put) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

LazyPrimitive (org.apache.hadoop.hive.serde2.lazy.LazyPrimitive)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Text (org.apache.hadoop.io.Text)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 BooleanWritable (org.apache.hadoop.io.BooleanWritable)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FloatWritable (org.apache.hadoop.io.FloatWritable)5 IntWritable (org.apache.hadoop.io.IntWritable)5 LongWritable (org.apache.hadoop.io.LongWritable)5 Put (org.apache.hadoop.hbase.client.Put)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)4 LazyMap (org.apache.hadoop.hive.serde2.lazy.LazyMap)4 LazyObject (org.apache.hadoop.hive.serde2.lazy.LazyObject)4 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)4 Writable (org.apache.hadoop.io.Writable)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)3 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3