use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazyBinarySerDe method testShorterSchemaDeserialization1.
/**
* Test shorter schema deserialization where a bigger struct is serialized and
* it is then deserialized with a smaller struct. Here the serialized struct
* has 9 fields and we deserialized to a struct of 8 fields.
*/
private void testShorterSchemaDeserialization1(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFast.
private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
lazySimpleSerializeWrite.set(output);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
int length = bytesWritable.getLength();
lazySimpleDeserializeRead.set(bytes, 0, length);
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
Object[] row = rows[i];
for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
object = lazyPrimitive.getWritableObject();
} else {
object = null;
}
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
byte[][] serdeBytes = new byte[rowCount][];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[columnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
for (int index = 0; index < columnCount; index++) {
serdeRow[index] = row[index];
}
Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match");
}
serdeBytes[i] = copyBytes(serialized);
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFastCase.
public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r);
int rowCount = 1000;
Object[][] rows = source.randomRows(rowCount);
if (doNonRandomFill) {
MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
}
StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
int columnCount = primitiveTypeInfos.length;
int writeColumnCount = columnCount;
StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
boolean doWriteFewerColumns = r.nextBoolean();
if (doWriteFewerColumns) {
writeColumnCount = 1 + r.nextInt(columnCount);
if (writeColumnCount == columnCount) {
doWriteFewerColumns = false;
} else {
writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
}
String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes);
LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes);
LazySimpleSerDe serde_fewer = null;
LazySerDeParameters serdeParams_fewer = null;
if (doWriteFewerColumns) {
String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
serde_fewer = getSerDe(fieldNames, fieldTypes);
serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes);
}
byte separator = (byte) '\t';
testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
false, r);
testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
false, r);
if (doWriteFewerColumns) {
testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
true, r);
testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
true, r);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazySimpleSerDe method testSerDeParameters.
/**
* Tests the deprecated usage of SerDeParameters.
*
*/
@Test
@SuppressWarnings("deprecation")
public void testSerDeParameters() throws SerDeException, IOException {
// Setup
LazySimpleSerDe serDe = new LazySimpleSerDe();
Configuration conf = new Configuration();
MyTestClass row = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
row.randomFill(new Random(1234), extraTypeInfo);
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
Properties schema = new Properties();
schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
SerDeUtils.initializeSerDe(serDe, conf, schema, null);
SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, schema, "testSerdeName");
// Test
LazyStruct data = (LazyStruct) serializeAndDeserialize(row, rowOI, serDe, serdeParams);
assertEquals((boolean) row.myBool, ((LazyBoolean) data.getField(0)).getWritableObject().get());
assertEquals((int) row.myInt, ((LazyInteger) data.getField(3)).getWritableObject().get());
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazySimpleSerDe method serializeAndDeserialize.
private Object serializeAndDeserialize(Object row, StructObjectInspector rowOI, LazySimpleSerDe serde, LazySerDeParameters serdeParams) throws IOException, SerDeException {
ByteStream.Output serializeStream = new ByteStream.Output();
LazySimpleSerDe.serialize(serializeStream, row, rowOI, serdeParams.getSeparators(), 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
Text t = new Text(serializeStream.toByteArray());
return serde.deserialize(t);
}
Aggregations