use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.
the class TestVectorSerDeRow method innerTestVectorSerializeRow.
void innerTestVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
// FUTURE: try NULLs and UNICODE.
source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, /* allowNulls */
false, /* isUnicodeOk */
false);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorAssignRow vectorAssignRow = new VectorAssignRow();
vectorAssignRow.init(source.typeNames());
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
deserializeRead = BinarySortableDeserializeRead.ascendingNullsFirst(source.typeInfos(), false);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.typeInfos(), /* useExternalBuffer */
false);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
// Use different separator values.
byte[] separators = new byte[] { (byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8 };
LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector, separators);
deserializeRead = new LazySimpleDeserializeRead(source.typeInfos(), /* useExternalBuffer */
false, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
vectorSerializeRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(2000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
vectorAssignRow.assignRow(batch, batch.size, row);
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
}
}
use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.
the class VectorMapJoinCommonOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
VectorExpression.doTransientInit(bigTableKeyExpressions, hconf);
VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
/*
* Get configuration parameters.
*/
overflowRepeatedThreshold = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD);
useOverflowRepeatedThreshold = (overflowRepeatedThreshold >= 0);
/*
* Create our vectorized copy row and deserialize row helper objects.
*/
if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
initializeFullOuterObjects();
}
if (smallTableValueMapping.getCount() > 0) {
smallTableValueVectorDeserializeRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */
true));
smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns());
}
if (bigTableRetainColumnMap.length > 0) {
bigTableRetainedVectorCopy = new VectorCopyRow();
bigTableRetainedVectorCopy.init(bigTableRetainColumnMap, bigTableRetainTypeInfos);
}
if (nonOuterSmallTableKeyColumnMap.length > 0) {
nonOuterSmallTableKeyVectorCopy = new VectorCopyRow();
nonOuterSmallTableKeyVectorCopy.init(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos);
}
if (outerSmallTableKeyMapping.getCount() > 0) {
outerSmallTableKeyVectorCopy = new VectorCopyRow();
outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping);
}
/*
* Setup the overflow batch.
*/
overflowBatch = setupOverflowBatch();
needCommonSetup = true;
needFirstBatchSetup = true;
needHashTableSetup = true;
if (LOG.isDebugEnabled()) {
int[] currentScratchColumns = vOutContext.currentScratchColumns();
LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns));
StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector;
List<? extends StructField> fields = structOutputObjectInspector.getAllStructFieldRefs();
int i = 0;
for (StructField field : fields) {
LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName());
i++;
}
}
}
use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.
the class SparkReduceRecordHandler method init.
@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
vectorized = gWork.getVectorMode();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
batchContext = gWork.getVectorizedRowBatchCtx();
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeySerDe = ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
inputKeySerDe.initialize(null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeySerDe.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
if (vectorized) {
final int maxTags = gWork.getTagToValueDesc().size();
// CONSIDER: Cleaning up this code and eliminating the arrays. Vectorization only handles
// one operator tree.
Preconditions.checkState(maxTags == 1);
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
buffer = new DataOutputBuffer();
}
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
AbstractSerDe inputValueSerDe = ReflectionUtils.newInstance(valueTableDesc[tag].getSerDeClass(), null);
inputValueSerDe.initialize(null, valueTableDesc[tag].getProperties(), null);
inputValueDeserializer[tag] = inputValueSerDe;
valueObjectInspector[tag] = inputValueSerDe.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeySerDe;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, firstValueColumnOffset) : batchContext.getRowdataTypePhysicalVariations(), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), firstValueColumnOffset, totalColumns) : null, /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
// reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
ExecMapperContext execContext = new ExecMapperContext(job);
localWork = gWork.getMapRedLocalWork();
execContext.setJc(jc);
execContext.setLocalWork(localWork);
reducer.passExecContext(execContext);
reducer.setReporter(rp);
OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
if (localWork != null) {
for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.
the class CheckFastRowHashMap method verifyHashMapRowsMore.
public static void verifyHashMapRowsMore(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos, int clipIndex, boolean useExactBytes) throws IOException {
String debugExceptionMessage = null;
StackTraceElement[] debugStackTrace = null;
final int count = rows.size();
final int columnCount = typeInfos.length;
WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
for (int a = 0; a < count; a++) {
int valueIndex = actualToValueMap[a];
Object[] row = rows.get(valueIndex);
byte[] bytes = ref.getBytes();
int offset = (int) ref.getOffset();
int length = ref.getLength();
if (a == clipIndex) {
length--;
}
if (useExactBytes) {
// Use exact byte array which might generate array out of bounds...
bytes = Arrays.copyOfRange(bytes, offset, offset + length);
offset = 0;
}
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
false);
lazyBinaryDeserializeRead.set(bytes, offset, length);
boolean thrown = false;
Exception saveException = null;
int index = 0;
try {
for (index = 0; index < columnCount; index++) {
verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
}
} catch (Exception e) {
thrown = true;
saveException = e;
lazyBinaryDeserializeRead.getDetailedReadPositionString();
hashMapResult.getDetailedHashMapResultPositionString();
debugExceptionMessage = saveException.getMessage();
debugStackTrace = saveException.getStackTrace();
}
if (a == clipIndex) {
if (!thrown) {
TestCase.fail("Expecting an exception to be thrown for the clipped case...");
} else {
TestCase.assertTrue(saveException != null);
if (saveException instanceof EOFException) {
// This is the one we are expecting.
} else if (saveException instanceof ArrayIndexOutOfBoundsException) {
} else {
TestCase.fail("Expecting an EOFException to be thrown for the clipped case...");
}
}
} else {
if (thrown) {
TestCase.fail("Not expecting an exception to be thrown for the non-clipped case... " + " exception message " + debugExceptionMessage + " stack trace " + getStackTraceAsSingleLine(debugStackTrace));
}
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
ref = hashMapResult.next();
if (a == count - 1) {
TestCase.assertTrue(ref == null);
} else {
TestCase.assertTrue(ref != null);
}
}
}
Aggregations