use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead in project hive by apache.
the class TestVectorSerDeRow method testVectorSerializeRow.
void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorAssignRow vectorAssignRow = new VectorAssignRow();
vectorAssignRow.init(source.typeNames());
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
byte separator = (byte) '\t';
deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
false, separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
vectorSerializeRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
vectorAssignRow.assignRow(batch, batch.size, row);
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
// For now, we don't go higher than the default batch size unless we do more work
// to verify every vectorized operator downstream can handle a larger batch size.
vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
}
this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeySerDe = ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
inputKeySerDe.initialize(null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeySerDe.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getSerDeClass(), null);
inputValueSerDe.initialize(null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueSerDe.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeySerDe;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, firstValueColumnOffset) : batchContext.getRowdataTypePhysicalVariations(), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), firstValueColumnOffset, totalColumns) : null, /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead in project hive by apache.
the class TestVectorSerDeRow method innerTestVectorDeserializeRow.
void innerTestVectorDeserializeRow(Random r, int iteration, SerializationType serializationType, boolean alternate1, boolean alternate2, boolean useExternalBuffer) throws HiveException, IOException, SerDeException {
String title = "serializationType: " + serializationType + ", iteration " + iteration;
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
// FUTURE: try NULLs and UNICODE.
source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, /* allowNulls */
false, /* isUnicodeOk */
false);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// junk the destination for the 1st pass
for (ColumnVector cv : batch.cols) {
Arrays.fill(cv.isNull, true);
}
TypeInfo[] typeInfos = source.typeInfos();
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
boolean useColumnSortOrderIsDesc = alternate1;
if (!useColumnSortOrderIsDesc) {
deserializeRead = BinarySortableDeserializeRead.ascendingNullsFirst(source.typeInfos(), useExternalBuffer);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
} else {
boolean[] columnSortOrderIsDesc = new boolean[fieldCount];
for (int i = 0; i < fieldCount; i++) {
columnSortOrderIsDesc[i] = r.nextBoolean();
}
byte[] columnNullMarker = new byte[fieldCount];
byte[] columnNotNullMarker = new byte[fieldCount];
for (int i = 0; i < fieldCount; i++) {
if (columnSortOrderIsDesc[i]) {
// Descending
// Null last (default for descending order)
columnNullMarker[i] = BinarySortableSerDe.ZERO;
columnNotNullMarker[i] = BinarySortableSerDe.ONE;
} else {
// Ascending
// Null first (default for ascending order)
columnNullMarker[i] = BinarySortableSerDe.ZERO;
columnNotNullMarker[i] = BinarySortableSerDe.ONE;
}
}
serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
deserializeRead = new BinarySortableDeserializeRead(source.typeInfos(), useExternalBuffer, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
}
boolean useBinarySortableCharsNeedingEscape = alternate2;
if (useBinarySortableCharsNeedingEscape) {
source.addBinarySortableAlphabets();
}
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), useExternalBuffer);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
Configuration conf = new Configuration();
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.FIELD_DELIM, "\t");
tbl.setProperty(serdeConstants.LINE_DELIM, "\n");
byte separator = (byte) '\t';
boolean useLazySimpleEscapes = alternate1;
if (useLazySimpleEscapes) {
tbl.setProperty(serdeConstants.QUOTE_CHAR, "'");
String escapeString = "\\";
tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString);
}
LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector, new byte[] { separator });
if (useLazySimpleEscapes) {
// LazySimple seems to throw away everything but \n and \r.
boolean[] needsEscape = lazySerDeParams.getNeedsEscape();
StringBuilder sb = new StringBuilder();
if (needsEscape['\n']) {
sb.append('\n');
}
if (needsEscape['\r']) {
sb.append('\r');
}
// for (int i = 0; i < needsEscape.length; i++) {
// if (needsEscape[i]) {
// sb.append((char) i);
// }
// }
String needsEscapeStr = sb.toString();
if (needsEscapeStr.length() > 0) {
source.addEscapables(needsEscapeStr);
}
}
deserializeRead = new LazySimpleDeserializeRead(source.typeInfos(), useExternalBuffer, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead);
vectorDeserializeRow.init();
// junk the destination for the 1st pass
for (ColumnVector cv : batch.cols) {
Arrays.fill(cv.isNull, true);
cv.noNulls = false;
}
VectorExtractRow vectorExtractRow = new VectorExtractRow();
vectorExtractRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(2000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
Output output = serializeRow(row, source, serializeWrite);
vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength());
try {
vectorDeserializeRow.deserialize(batch, batch.size);
} catch (Exception e) {
throw new HiveException("\nDeserializeRead details: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
}
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex, title);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex, title);
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead in project hive by apache.
the class TestBinarySortableFast method testBinarySortableFast.
private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = typeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
}
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
// Try to serialize
// One Writable per row.
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
int[][] perFieldWriteLengthsArray = new int[rowCount][];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
binarySortableSerializeWrite.set(output);
int[] perFieldWriteLengths = new int[columnCount];
for (int index = 0; index < writeColumnCount; index++) {
VerifyFast.serializeWrite(binarySortableSerializeWrite, typeInfos[index], row[index]);
perFieldWriteLengths[index] = output.getLength();
}
perFieldWriteLengthsArray[i] = perFieldWriteLengths;
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
if (i > 0) {
BytesWritable previousBytesWritable = serializeWriteBytes[i - 1];
int compareResult = previousBytesWritable.compareTo(bytesWritable);
if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
}
}
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
BytesWritable bytesWritable = serializeWriteBytes[i];
binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfos[index], null);
} else {
verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
}
}
if (writeColumnCount == columnCount) {
assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
}
/*
* Clip off one byte and expect to get an EOFException on the write field.
*/
BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
bytesWritable.getLength() - 1);
for (int index = 0; index < writeColumnCount; index++) {
if (index == writeColumnCount - 1) {
boolean threw = false;
try {
verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
} catch (EOFException e) {
// debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
// debugStackTrace = e.getStackTrace();
threw = true;
}
if (!threw && row[index] != null) {
Assert.fail();
}
} else {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead2.skipNextField();
} else {
verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
}
}
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
// Note that regular SerDe doesn't tolerate fewer columns.
List<Object> deserializedRow;
if (doWriteFewerColumns) {
deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
} else {
deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
}
Object[] row = rows[i];
for (int index = 0; index < writeColumnCount; index++) {
Object expected = row[index];
Object object = deserializedRow.get(index);
if (expected == null || object == null) {
if (expected != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(expected)) {
fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
}
}
}
}
// One Writable per row.
BytesWritable[] serdeBytes = new BytesWritable[rowCount];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// Since SerDe reuses memory, we will need to make a copy.
BytesWritable serialized;
if (doWriteFewerColumns) {
serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
} else {
serialized = (BytesWritable) serde.serialize(row, rowOI);
;
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(serialized);
byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
int mismatchPos = -1;
if (serDeOutput.length != serializeWriteExpected.length) {
for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
if (serDeOutput[b] != serializeWriteExpected[b]) {
mismatchPos = b;
break;
}
}
fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
}
List<Integer> differentPositions = new ArrayList();
for (int b = 0; b < serDeOutput.length; b++) {
if (serDeOutput[b] != serializeWriteExpected[b]) {
differentPositions.add(b);
}
}
if (differentPositions.size() > 0) {
List<String> serializeWriteExpectedFields = new ArrayList<String>();
List<String> serDeFields = new ArrayList<String>();
int f = 0;
int lastBegin = 0;
for (int b = 0; b < serDeOutput.length; b++) {
int writeLength = perFieldWriteLengthsArray[i][f];
if (b + 1 == writeLength) {
serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
f++;
lastBegin = b + 1;
}
}
fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(typeInfos) + "\nrow " + Arrays.toString(row));
}
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
BytesWritable bytesWritable = serdeBytes[i];
binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
verifyRead(binarySortableDeserializeRead, typeInfos[index], null);
} else {
verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
}
}
if (writeColumnCount == columnCount) {
assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Aggregations