use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.
the class TestVectorSerDeRow method deserializeAndVerify.
void deserializeAndVerify(Output output, DeserializeRead deserializeRead, VectorRandomRowSource source, Object[] expectedRow) throws HiveException, IOException {
deserializeRead.set(output.getData(), 0, output.getLength());
PrimitiveCategory[] primitiveCategories = source.primitiveCategories();
for (int i = 0; i < primitiveCategories.length; i++) {
Object expected = expectedRow[i];
PrimitiveCategory primitiveCategory = primitiveCategories[i];
PrimitiveTypeInfo primitiveTypeInfo = source.primitiveTypeInfos()[i];
if (!deserializeRead.readNextField()) {
throw new HiveException("Unexpected NULL when reading primitiveCategory " + primitiveCategory + " expected (" + expected.getClass().getName() + ", " + expected.toString() + ") " + " deserializeRead " + deserializeRead.getClass().getName());
}
switch(primitiveCategory) {
case BOOLEAN:
{
Boolean value = deserializeRead.currentBoolean;
BooleanWritable expectedWritable = (BooleanWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
Byte value = deserializeRead.currentByte;
ByteWritable expectedWritable = (ByteWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
Short value = deserializeRead.currentShort;
ShortWritable expectedWritable = (ShortWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
Integer value = deserializeRead.currentInt;
IntWritable expectedWritable = (IntWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
Long value = deserializeRead.currentLong;
LongWritable expectedWritable = (LongWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DATE:
{
DateWritable value = deserializeRead.currentDateWritable;
DateWritable expectedWritable = (DateWritable) expected;
if (!value.equals(expectedWritable)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case FLOAT:
{
Float value = deserializeRead.currentFloat;
FloatWritable expectedWritable = (FloatWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
Double value = deserializeRead.currentDouble;
DoubleWritable expectedWritable = (DoubleWritable) expected;
if (!value.equals(expectedWritable.get())) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
case CHAR:
case VARCHAR:
case BINARY:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
switch(primitiveCategory) {
case STRING:
{
Text expectedWritable = (Text) expected;
if (!string.equals(expectedWritable.toString())) {
TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + string + "')");
}
}
break;
case CHAR:
{
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
HiveCharWritable expectedWritable = (HiveCharWritable) expected;
if (!hiveChar.equals(expectedWritable.getHiveChar())) {
TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')");
}
}
break;
case VARCHAR:
{
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected;
if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) {
TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')");
}
}
break;
case BINARY:
{
BytesWritable expectedWritable = (BytesWritable) expected;
if (stringBytes.length != expectedWritable.getLength()) {
TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")");
}
byte[] expectedBytes = expectedWritable.getBytes();
for (int b = 0; b < stringBytes.length; b++) {
if (stringBytes[b] != expectedBytes[b]) {
TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")");
}
}
}
break;
default:
throw new HiveException("Unexpected primitive category " + primitiveCategory);
}
}
break;
case DECIMAL:
{
HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
HiveDecimalWritable expectedWritable = (HiveDecimalWritable) expected;
if (!value.equals(expectedWritable.getHiveDecimal())) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
TestCase.fail("Decimal field mismatch (expected " + expectedWritable.getHiveDecimal() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
TimestampWritable expectedWritable = (TimestampWritable) expected;
if (!value.equals(expectedWritable.getTimestamp())) {
TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected;
HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth();
if (!value.equals(expectedValue)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expectedValue + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected;
HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime();
if (!value.equals(expectedValue)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expectedValue + " found " + value.toString() + ")");
}
}
break;
default:
throw new HiveException("Unexpected primitive category " + primitiveCategory);
}
}
TestCase.assertTrue(deserializeRead.isEndOfInputReached());
}
use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.
the class TestVectorSerDeRow method testVectorSerializeRow.
void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorAssignRow vectorAssignRow = new VectorAssignRow();
vectorAssignRow.init(source.typeNames());
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
byte separator = (byte) '\t';
deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false, separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
vectorSerializeRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
vectorAssignRow.assignRow(batch, batch.size, row);
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
}
}
use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.
the class TestVectorSerDeRow method serializeBatch.
void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows, int firstRandomRowIndex) throws HiveException, IOException {
Output output = new Output();
for (int i = 0; i < batch.size; i++) {
output.reset();
vectorSerializeRow.setOutput(output);
vectorSerializeRow.serializeWrite(batch, i);
Object[] expectedRow = randomRows[firstRandomRowIndex + i];
byte[] bytes = output.getData();
int length = output.getLength();
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
deserializeAndVerify(output, deserializeRead, source, expectedRow);
}
}
use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.
the class TestLazyBinaryFast method testLazyBinaryFast.
private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
lazyBinarySerializeWrite.set(output);
for (int index = 0; index < writeColumnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
// column.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serializeWriteBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyBinaryStruct lazyBinaryStruct;
if (doWriteFewerColumns) {
lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
} else {
lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
}
Object[] row = rows[i];
for (int index = 0; index < writeColumnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
Object object = lazyBinaryStruct.getField(index);
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
BytesWritable[] serdeBytes = new BytesWritable[rowCount];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[writeColumnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazyBinary seems to work better with an row object array instead of a Java object...
for (int index = 0; index < writeColumnCount; index++) {
serdeRow[index] = row[index];
}
BytesWritable serialized;
if (doWriteFewerColumns) {
serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
} else {
serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
}
BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
byte[] bytes1 = bytesWritable.getBytes();
BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
if (bytes1.length != bytes2.length) {
fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// When doWriteFewerColumns, try to read more fields than exist in buffer.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serdeBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFast.
private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
lazySimpleSerializeWrite.set(output);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
int length = bytesWritable.getLength();
lazySimpleDeserializeRead.set(bytes, 0, length);
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
Object[] row = rows[i];
for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
object = lazyPrimitive.getWritableObject();
} else {
object = null;
}
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
byte[][] serdeBytes = new byte[rowCount][];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[columnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
for (int index = 0; index < columnCount; index++) {
serdeRow[index] = row[index];
}
Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match");
}
serdeBytes[i] = copyBytes(serialized);
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
}
Aggregations