use of org.apache.drill.exec.physical.impl.common.HashTable in project drill by apache.
the class HashAggTemplate method checkGroupAndAggrValues.
// Check if a group is present in the hash table; if not, insert it in the hash table.
// The htIdxHolder contains the index of the group in the hash table container; this same
// index is also used for the aggregation values maintained by the hash aggregate.
private void checkGroupAndAggrValues(int incomingRowIdx) {
assert incomingRowIdx >= 0;
assert !earlyOutput;
// The hash code is computed once, then its lower bits are used to determine the
// partition to use, and the higher bits determine the location in the hash table.
int hashCode;
try {
// htables[0].updateBatches();
hashCode = htables[0].getBuildHashCode(incomingRowIdx);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException("Unexpected schema change", e);
}
// right shift hash code for secondary (or tertiary...) spilling
for (int i = 0; i < spilledState.getCycle(); i++) {
hashCode >>>= spilledState.getBitsInMask();
}
int currentPartition = hashCode & spilledState.getPartitionMask();
hashCode >>>= spilledState.getBitsInMask();
HashTable.PutStatus putStatus = null;
long allocatedBeforeHTput = allocator.getAllocatedMemory();
String tryingTo = phase.is1st() ? "early return" : "spill";
// Proactive spill - in case there is no reserve memory - spill and retry putting later
if (reserveValueBatchMemory == 0 && canSpill) {
logger.trace("Reserved memory runs short, trying to {} a partition and retry Hash Table put() again.", tryingTo);
// spill to free some memory
doSpill(currentPartition);
retrySameIndex = true;
// to retry this put()
return;
}
// ==========================================
try {
putStatus = htables[currentPartition].put(incomingRowIdx, htIdxHolder, hashCode, getTargetBatchCount());
} catch (RetryAfterSpillException re) {
if (!canSpill) {
throw new OutOfMemoryException(getOOMErrorMsg("Can not spill"));
}
logger.trace("HT put failed with an OOM, trying to {} a partition and retry Hash Table put() again.", tryingTo);
// for debugging - in case there's a leak
long memDiff = allocator.getAllocatedMemory() - allocatedBeforeHTput;
if (memDiff > 0) {
logger.warn("Leak: HashTable put() OOM left behind {} bytes allocated", memDiff);
}
// spill to free some memory
doSpill(currentPartition);
retrySameIndex = true;
// to retry this put()
return;
} catch (OutOfMemoryException exc) {
throw new OutOfMemoryException(getOOMErrorMsg("HT was: " + allocatedBeforeHTput), exc);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException("Unexpected schema change", e);
}
long allocatedBeforeAggCol = allocator.getAllocatedMemory();
boolean needToCheckIfSpillIsNeeded = allocatedBeforeAggCol > allocatedBeforeHTput;
//
if (putStatus == HashTable.PutStatus.NEW_BATCH_ADDED) {
try {
// try to preempt an OOM by using the reserve
useReservedValuesMemory();
// allocate a new (internal) values batch
addBatchHolder(currentPartition, getTargetBatchCount());
// restore the reserve, if possible
restoreReservedMemory();
// A reason to check for a spill - In case restore-reserve failed
needToCheckIfSpillIsNeeded = (0 == reserveValueBatchMemory);
// just allocated a planned batch
if (plannedBatches > 0) {
plannedBatches--;
}
long totalAddedMem = allocator.getAllocatedMemory() - allocatedBeforeHTput;
long aggValuesAddedMem = allocator.getAllocatedMemory() - allocatedBeforeAggCol;
logger.trace("MEMORY CHECK AGG: allocated now {}, added {}, total (with HT) added {}", allocator.getAllocatedMemory(), aggValuesAddedMem, totalAddedMem);
// resize the batch estimates if needed (e.g., varchars may take more memory than estimated)
if (totalAddedMem > estMaxBatchSize) {
logger.trace("Adjusting Batch size estimate from {} to {}", estMaxBatchSize, totalAddedMem);
estMaxBatchSize = totalAddedMem;
needToCheckIfSpillIsNeeded = true;
}
if (aggValuesAddedMem > estValuesBatchSize) {
logger.trace("Adjusting Values Batch size from {} to {}", estValuesBatchSize, aggValuesAddedMem);
estValuesBatchSize = aggValuesAddedMem;
needToCheckIfSpillIsNeeded = true;
}
} catch (OutOfMemoryException exc) {
throw new OutOfMemoryException(getOOMErrorMsg("AGGR"), exc);
}
} else if (putStatus == HashTable.PutStatus.KEY_ADDED_LAST) {
// If a batch just became full (i.e. another batch would be allocated soon) -- then need to
// check (later, see below) if the memory limits are too close, and if so -- then spill !
// planning to allocate one more batch
plannedBatches++;
needToCheckIfSpillIsNeeded = true;
}
// =================================================================
// Locate the matching aggregate columns and perform the aggregation
// =================================================================
int currentIdx = htIdxHolder.value;
BatchHolder bh = batchHolders[currentPartition].get((currentIdx >>> 16) & BATCH_MASK);
int idxWithinBatch = currentIdx & BATCH_MASK;
if (bh.updateAggrValues(incomingRowIdx, idxWithinBatch)) {
numGroupedRecords++;
}
// ===================================================================================
if (needToCheckIfSpillIsNeeded && canSpill && useMemoryPrediction) {
spillIfNeeded(currentPartition);
}
}
use of org.apache.drill.exec.physical.impl.common.HashTable in project drill by axbaretto.
the class HashAggTemplate method checkGroupAndAggrValues.
// Check if a group is present in the hash table; if not, insert it in the hash table.
// The htIdxHolder contains the index of the group in the hash table container; this same
// index is also used for the aggregation values maintained by the hash aggregate.
private void checkGroupAndAggrValues(int incomingRowIdx) {
assert incomingRowIdx >= 0;
assert !earlyOutput;
/**
* for debugging
* Object tmp = (incoming).getValueAccessorById(0, BigIntVector.class).getValueVector();
* BigIntVector vv0 = null;
* BigIntHolder holder = null;
*
* if (tmp != null) {
* vv0 = ((BigIntVector) tmp);
* holder = new BigIntHolder();
* holder.value = vv0.getAccessor().get(incomingRowIdx) ;
* }
*/
/*
if ( handlingSpills && ( incomingRowIdx == 0 ) ) {
// for debugging -- show the first row from a spilled batch
Object tmp0 = (incoming).getValueAccessorById(NullableVarCharVector.class, 0).getValueVector();
Object tmp1 = (incoming).getValueAccessorById(NullableVarCharVector.class, 1).getValueVector();
Object tmp2 = (incoming).getValueAccessorById(NullableBigIntVector.class, 2).getValueVector();
if (tmp0 != null && tmp1 != null && tmp2 != null) {
NullableVarCharVector vv0 = ((NullableVarCharVector) tmp0);
NullableVarCharVector vv1 = ((NullableVarCharVector) tmp1);
NullableBigIntVector vv2 = ((NullableBigIntVector) tmp2);
logger.debug("The first row = {} , {} , {}", vv0.getAccessor().get(incomingRowIdx), vv1.getAccessor().get(incomingRowIdx), vv2.getAccessor().get(incomingRowIdx));
}
}
*/
// The hash code is computed once, then its lower bits are used to determine the
// partition to use, and the higher bits determine the location in the hash table.
int hashCode;
try {
// htables[0].updateBatches();
hashCode = htables[0].getHashCode(incomingRowIdx);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException("Unexpected schema change", e);
}
// right shift hash code for secondary (or tertiary...) spilling
for (int i = 0; i < cycleNum; i++) {
hashCode >>>= bitsInMask;
}
int currentPartition = hashCode & partitionMask;
hashCode >>>= bitsInMask;
HashTable.PutStatus putStatus = null;
long allocatedBeforeHTput = allocator.getAllocatedMemory();
// Proactive spill - in case there is no reserve memory - spill and retry putting later
if (reserveValueBatchMemory == 0 && canSpill) {
logger.trace("Reserved memory runs short, trying to {} a partition and retry Hash Table put() again.", is1stPhase ? "early return" : "spill");
// spill to free some memory
doSpill(currentPartition);
retrySameIndex = true;
// to retry this put()
return;
}
// ==========================================
try {
putStatus = htables[currentPartition].put(incomingRowIdx, htIdxHolder, hashCode);
} catch (RetryAfterSpillException re) {
if (!canSpill) {
throw new OutOfMemoryException(getOOMErrorMsg("Can not spill"));
}
logger.trace("HT put failed with an OOM, trying to {} a partition and retry Hash Table put() again.", is1stPhase ? "early return" : "spill");
// for debugging - in case there's a leak
long memDiff = allocator.getAllocatedMemory() - allocatedBeforeHTput;
if (memDiff > 0) {
logger.warn("Leak: HashTable put() OOM left behind {} bytes allocated", memDiff);
}
// spill to free some memory
doSpill(currentPartition);
retrySameIndex = true;
// to retry this put()
return;
} catch (OutOfMemoryException exc) {
throw new OutOfMemoryException(getOOMErrorMsg("HT was: " + allocatedBeforeHTput), exc);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException("Unexpected schema change", e);
}
long allocatedBeforeAggCol = allocator.getAllocatedMemory();
boolean needToCheckIfSpillIsNeeded = allocatedBeforeAggCol > allocatedBeforeHTput;
//
if (putStatus == HashTable.PutStatus.NEW_BATCH_ADDED) {
try {
// try to preempt an OOM by using the reserve
useReservedValuesMemory();
// allocate a new (internal) values batch
addBatchHolder(currentPartition);
// restore the reserve, if possible
restoreReservedMemory();
// A reason to check for a spill - In case restore-reserve failed
needToCheckIfSpillIsNeeded = (0 == reserveValueBatchMemory);
// just allocated a planned batch
if (plannedBatches > 0) {
plannedBatches--;
}
long totalAddedMem = allocator.getAllocatedMemory() - allocatedBeforeHTput;
long aggValuesAddedMem = allocator.getAllocatedMemory() - allocatedBeforeAggCol;
logger.trace("MEMORY CHECK AGG: allocated now {}, added {}, total (with HT) added {}", allocator.getAllocatedMemory(), aggValuesAddedMem, totalAddedMem);
// resize the batch estimates if needed (e.g., varchars may take more memory than estimated)
if (totalAddedMem > estMaxBatchSize) {
logger.trace("Adjusting Batch size estimate from {} to {}", estMaxBatchSize, totalAddedMem);
estMaxBatchSize = totalAddedMem;
needToCheckIfSpillIsNeeded = true;
}
if (aggValuesAddedMem > estValuesBatchSize) {
logger.trace("Adjusting Values Batch size from {} to {}", estValuesBatchSize, aggValuesAddedMem);
estValuesBatchSize = aggValuesAddedMem;
needToCheckIfSpillIsNeeded = true;
}
} catch (OutOfMemoryException exc) {
throw new OutOfMemoryException(getOOMErrorMsg("AGGR"), exc);
}
} else if (putStatus == HashTable.PutStatus.KEY_ADDED_LAST) {
// If a batch just became full (i.e. another batch would be allocated soon) -- then need to
// check (later, see below) if the memory limits are too close, and if so -- then spill !
// planning to allocate one more batch
plannedBatches++;
needToCheckIfSpillIsNeeded = true;
}
// =================================================================
// Locate the matching aggregate columns and perform the aggregation
// =================================================================
int currentIdx = htIdxHolder.value;
BatchHolder bh = batchHolders[currentPartition].get((currentIdx >>> 16) & HashTable.BATCH_MASK);
int idxWithinBatch = currentIdx & HashTable.BATCH_MASK;
if (bh.updateAggrValues(incomingRowIdx, idxWithinBatch)) {
numGroupedRecords++;
}
// ===================================================================================
if (needToCheckIfSpillIsNeeded && canSpill && useMemoryPrediction) {
spillIfNeeded(currentPartition);
}
}
Aggregations