use of org.apache.hadoop.hive.serde2.fast.SerializeWrite in project hive by apache.
the class TestVectorSerDeRow method serializeRow.
private Output serializeRow(Object[] row, VectorRandomRowSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
Output output = new Output();
serializeWrite.set(output);
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
for (int i = 0; i < primitiveTypeInfos.length; i++) {
Object object = row[i];
PrimitiveCategory primitiveCategory = primitiveTypeInfos[i].getPrimitiveCategory();
switch(primitiveCategory) {
case BOOLEAN:
{
BooleanWritable expectedWritable = (BooleanWritable) object;
boolean value = expectedWritable.get();
serializeWrite.writeBoolean(value);
}
break;
case BYTE:
{
ByteWritable expectedWritable = (ByteWritable) object;
byte value = expectedWritable.get();
serializeWrite.writeByte(value);
}
break;
case SHORT:
{
ShortWritable expectedWritable = (ShortWritable) object;
short value = expectedWritable.get();
serializeWrite.writeShort(value);
}
break;
case INT:
{
IntWritable expectedWritable = (IntWritable) object;
int value = expectedWritable.get();
serializeWrite.writeInt(value);
}
break;
case LONG:
{
LongWritable expectedWritable = (LongWritable) object;
long value = expectedWritable.get();
serializeWrite.writeLong(value);
}
break;
case DATE:
{
DateWritable expectedWritable = (DateWritable) object;
Date value = expectedWritable.get();
serializeWrite.writeDate(value);
}
break;
case FLOAT:
{
FloatWritable expectedWritable = (FloatWritable) object;
float value = expectedWritable.get();
serializeWrite.writeFloat(value);
}
break;
case DOUBLE:
{
DoubleWritable expectedWritable = (DoubleWritable) object;
double value = expectedWritable.get();
serializeWrite.writeDouble(value);
}
break;
case STRING:
{
Text text = (Text) object;
serializeWrite.writeString(text.getBytes(), 0, text.getLength());
}
break;
case CHAR:
{
HiveCharWritable expectedWritable = (HiveCharWritable) object;
HiveChar value = expectedWritable.getHiveChar();
serializeWrite.writeHiveChar(value);
}
break;
case VARCHAR:
{
HiveVarcharWritable expectedWritable = (HiveVarcharWritable) object;
HiveVarchar value = expectedWritable.getHiveVarchar();
serializeWrite.writeHiveVarchar(value);
}
break;
case BINARY:
{
BytesWritable expectedWritable = (BytesWritable) object;
byte[] bytes = expectedWritable.getBytes();
int length = expectedWritable.getLength();
serializeWrite.writeBinary(bytes, 0, length);
}
break;
case TIMESTAMP:
{
TimestampWritable expectedWritable = (TimestampWritable) object;
Timestamp value = expectedWritable.getTimestamp();
serializeWrite.writeTimestamp(value);
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) object;
HiveIntervalYearMonth value = expectedWritable.getHiveIntervalYearMonth();
serializeWrite.writeHiveIntervalYearMonth(value);
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) object;
HiveIntervalDayTime value = expectedWritable.getHiveIntervalDayTime();
serializeWrite.writeHiveIntervalDayTime(value);
}
break;
case DECIMAL:
{
HiveDecimalWritable expectedWritable = (HiveDecimalWritable) object;
HiveDecimal value = expectedWritable.getHiveDecimal();
serializeWrite.writeHiveDecimal(value, ((DecimalTypeInfo) primitiveTypeInfos[i]).scale());
}
break;
default:
throw new HiveException("Unexpected primitive category " + primitiveCategory);
}
}
return output;
}
use of org.apache.hadoop.hive.serde2.fast.SerializeWrite in project hive by apache.
the class TestVectorSerDeRow method testVectorSerializeRow.
void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorAssignRow vectorAssignRow = new VectorAssignRow();
vectorAssignRow.init(source.typeNames());
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
byte separator = (byte) '\t';
deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false, separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
vectorSerializeRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
vectorAssignRow.assignRow(batch, batch.size, row);
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
}
}
use of org.apache.hadoop.hive.serde2.fast.SerializeWrite in project hive by apache.
the class TestLazyBinaryFast method testLazyBinaryFast.
private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
lazyBinarySerializeWrite.set(output);
for (int index = 0; index < writeColumnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
// column.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serializeWriteBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyBinaryStruct lazyBinaryStruct;
if (doWriteFewerColumns) {
lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
} else {
lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
}
Object[] row = rows[i];
for (int index = 0; index < writeColumnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
Object object = lazyBinaryStruct.getField(index);
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
BytesWritable[] serdeBytes = new BytesWritable[rowCount];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[writeColumnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazyBinary seems to work better with an row object array instead of a Java object...
for (int index = 0; index < writeColumnCount; index++) {
serdeRow[index] = row[index];
}
BytesWritable serialized;
if (doWriteFewerColumns) {
serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
} else {
serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
}
BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
byte[] bytes1 = bytesWritable.getBytes();
BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
if (bytes1.length != bytes2.length) {
fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// When doWriteFewerColumns, try to read more fields than exist in buffer.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serdeBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.fast.SerializeWrite in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFast.
private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
lazySimpleSerializeWrite.set(output);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
int length = bytesWritable.getLength();
lazySimpleDeserializeRead.set(bytes, 0, length);
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
Object[] row = rows[i];
for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
object = lazyPrimitive.getWritableObject();
} else {
object = null;
}
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
byte[][] serdeBytes = new byte[rowCount][];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[columnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
for (int index = 0; index < columnCount; index++) {
serdeRow[index] = row[index];
}
Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match");
}
serdeBytes[i] = copyBytes(serialized);
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.fast.SerializeWrite in project hive by apache.
the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.
private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
final int keyCount = keyTypeNames.length;
PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
for (int i = 0; i < keyCount; i++) {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
keyPrimitiveCategories[i] = primitiveCategory;
keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
}
boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
Arrays.fill(keyColumnSortOrderIsDesc, false);
byte[] keyColumnNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
byte[] keyColumnNotNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
final int columnCount = valuePrimitiveTypeInfos.length;
SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
final int count = rows.length;
for (int i = 0; i < count; i++) {
Object[] valueRow = rows[i];
Output valueOutput = new Output();
((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) valueRow[index];
VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
}
byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
// Add a new key or add a value to an existing key?
byte[] key;
if (random.nextBoolean() || verifyTable.getCount() == 0) {
Object[] keyRow = VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList, keyPrimitiveCategories, keyPrimitiveTypeInfos);
Output keyOutput = new Output();
keySerializeWrite.set(keyOutput);
for (int index = 0; index < keyCount; index++) {
Writable writable = (Writable) keyRow[index];
VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
}
key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
verifyTable.add(key, keyRow, value, valueRow);
} else {
key = verifyTable.addRandomExisting(value, valueRow, random);
}
// Serialize keyRow into key bytes.
BytesWritable keyWritable = new BytesWritable(key);
BytesWritable valueWritable = new BytesWritable(value);
map.putRow(keyWritable, valueWritable);
// verifyTable.verify(map);
}
verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos, doClipping, useExactBytes, random);
}
Aggregations