use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFast.
private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
lazySimpleSerializeWrite.set(output);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
int length = bytesWritable.getLength();
lazySimpleDeserializeRead.set(bytes, 0, length);
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
Object[] row = rows[i];
for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
object = lazyPrimitive.getWritableObject();
} else {
object = null;
}
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
byte[][] serdeBytes = new byte[rowCount][];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[columnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
for (int index = 0; index < columnCount; index++) {
serdeRow[index] = row[index];
}
Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match");
}
serdeBytes[i] = copyBytes(serialized);
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class ColumnMappings method setHiveColumnDescription.
void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
if (columnsMapping.length != columnNames.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
}
// where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
for (int i = 0; i < columnNames.size(); i++) {
ColumnMapping colMap = columnsMapping[i];
colMap.columnName = columnNames.get(i);
colMap.columnType = columnTypes.get(i);
if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
}
}
if (colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
}
}
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class LazyHBaseCellMap method getMapValueElement.
/**
* Get the value in the map for the given key.
*
* @param key
* @return
*/
@Override
public Object getMapValueElement(Object key) {
if (!getParsed()) {
parse();
}
for (Map.Entry<Object, Object> entry : cachedMap.entrySet()) {
LazyPrimitive<?, ?> lazyKeyI = (LazyPrimitive<?, ?>) entry.getKey();
// getWritableObject() will convert LazyPrimitive to actual primitive
// writable objects.
Object keyI = lazyKeyI.getWritableObject();
if (keyI == null) {
continue;
}
if (keyI.equals(key)) {
// Got a match, return the value
Object _value = entry.getValue();
// Else return it as it is.
if (_value instanceof LazyObject) {
LazyObject<?> v = (LazyObject<?>) entry.getValue();
return v == null ? v : v.getObject();
} else {
return _value;
}
}
}
return null;
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerialize.
private void deserializeAndSerialize(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData) throws SerDeException {
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals(9, fieldRefs.size());
// Deserialize
Object row = serDe.deserialize(new ResultWritable(r));
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
if (fieldData != null) {
fieldData = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
}
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
// Serialize
assertEquals(PutWritable.class, serDe.getSerializedClass());
PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi);
assertEquals("Serialized data", p.toString(), String.valueOf(serializedPut.getPut()));
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerializeHiveStructColumnFamily.
private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
Object row = serDe.deserialize(new ResultWritable(r));
int k = 0;
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
assertNotNull(fieldData);
if (fieldData instanceof LazyPrimitive<?, ?>) {
assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
} else if (fieldData instanceof LazyHBaseCellMap) {
for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
assertEquals(expectedFieldsData[k + 1], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k)).toString().trim());
k++;
}
assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
// Make sure that the unwanted key is not present in the map
assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
} else {
fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
}
}
SerDeUtils.getJSONString(row, soi);
// Now serialize
Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
assertNotNull(put);
}
Aggregations