use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.
the class VectorMapJoinGenerateResultOperator method generateHashMapResultSingleValue.
// ------------------------------------------------------------------------------------------------
/*
* Common generate join results from hash maps used by Inner and Outer joins.
*/
/**
* Generate join results for a single small table value match.
*
* @param batch
* The big table batch.
* @param hashMapResult
* The hash map results for the matching key.
* @param allMatchs
* The selection array for all matches key.
* @param allMatchesIndex
* Index into allMatches of the matching key we are generating for.
* @param duplicateCount
* Number of equal key rows.
* @param numSel
* Current number of rows that are remaining in the big table for forwarding.
* @return
* The new count of selected rows.
*/
protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount, int numSel) throws HiveException, IOException {
// Read single value.
ByteSegmentRef byteSegmentRef = hashMapResult.first();
for (int i = 0; i < duplicateCount; i++) {
int batchIndex = allMatchs[allMatchesIndex + i];
//
if (bigTableVectorCopyOuterKeys != null) {
// Copy within row.
bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex);
}
if (smallTableVectorDeserializeRow != null) {
doSmallTableDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult);
}
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table");
// Use the big table row as output.
batch.selected[numSel++] = batchIndex;
}
return numSel;
}
use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.
the class VectorMapJoinGenerateResultOperator method generateHashMapResultLargeMultiValue.
/**
* Generate optimized results for a large N x M cross product using repeated vectorized row
* batch optimization.
*
* @param batch
* The big table batch.
* @param hashMapResult
* The hash map results for the matching key.
* @param allMatchs
* The all match selected array that contains (physical) batch indices.
* @param allMatchesIndex
* The index of the match key.
* @param duplicateCount
* Number of equal key rows.
*/
private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
// Kick out previous overflow batch results.
if (overflowBatch.size > 0) {
forwardOverflow();
}
ByteSegmentRef byteSegmentRef = hashMapResult.first();
while (byteSegmentRef != null) {
// Fill up as much of the overflow batch as possible with small table values.
while (byteSegmentRef != null) {
if (smallTableVectorDeserializeRow != null) {
doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
}
overflowBatch.size++;
if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
break;
}
byteSegmentRef = hashMapResult.next();
}
for (int i = 0; i < duplicateCount; i++) {
int batchIndex = allMatchs[allMatchesIndex + i];
if (bigTableRetainedVectorCopy != null) {
// The one big table row's values repeat.
bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0);
for (int column : bigTableRetainedMapping.getOutputColumns()) {
overflowBatch.cols[column].isRepeating = true;
}
}
// Crucial here that we don't reset the overflow batch, or we will loose the small table
// values we put in above.
forwardOverflowNoReset();
// Hand reset the big table columns.
for (int column : bigTableRetainedMapping.getOutputColumns()) {
ColumnVector colVector = overflowBatch.cols[column];
colVector.reset();
}
}
byteSegmentRef = hashMapResult.next();
if (byteSegmentRef == null) {
break;
}
// Get ready for a another round of small table values.
overflowBatch.reset();
}
// Clear away any residue from our optimizations.
overflowBatch.reset();
}
use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.
the class VectorMapJoinGenerateResultOperator method doSmallTableDeserializeRow.
protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException {
byte[] bytes = byteSegmentRef.getBytes();
int offset = (int) byteSegmentRef.getOffset();
int length = byteSegmentRef.getLength();
smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
try {
// Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc.
smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex);
} catch (Exception e) {
throw new HiveException("\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + smallTableVectorDeserializeRow.getDetailedReadPositionString(), e);
}
}
use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.
the class VectorMapJoinGenerateResultOperator method generateHashMapResultMultiValue.
/**
* Generate results for a N x M cross product.
*
* @param batch
* The big table batch.
* @param hashMapResult
* The hash map results for the matching key.
* @param allMatchs
* The all match selected array that contains (physical) batch indices.
* @param allMatchesIndex
* The index of the match key.
* @param duplicateCount
* Number of equal key rows.
*/
protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
if (useOverflowRepeatedThreshold && hashMapResult.isCappedCountAvailable() && hashMapResult.cappedCount() > overflowRepeatedThreshold) {
// Large cross product: generate the vector optimization using repeating vectorized
// row batch optimization in the overflow batch.
generateHashMapResultLargeMultiValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
return;
}
for (int i = 0; i < duplicateCount; i++) {
int batchIndex = allMatchs[allMatchesIndex + i];
ByteSegmentRef byteSegmentRef = hashMapResult.first();
while (byteSegmentRef != null) {
// Note this includes any outer join keys that need to go into the small table "area".
if (bigTableRetainedVectorCopy != null) {
bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, overflowBatch, overflowBatch.size);
}
if (smallTableVectorDeserializeRow != null) {
doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
}
// VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow");
overflowBatch.size++;
if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
forwardOverflow();
}
byteSegmentRef = hashMapResult.next();
}
}
}
Aggregations