use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.
the class HashJoinBatch method executeBuildPhase.
/**
* Execute the BUILD phase; first read incoming and split rows into
* partitions; may decide to spill some of the partitions
*
* @return Returns an
* {@link org.apache.drill.exec.record.RecordBatch.IterOutcome} if a
* termination condition is reached. Otherwise returns null.
* @throws SchemaChangeException
*/
public IterOutcome executeBuildPhase() throws SchemaChangeException {
if (buildSideIsEmpty.booleanValue()) {
// empty right
return null;
}
if (skipHashTableBuild) {
// No hash table needed - then consume all the
// right upstream
killAndDrainRightUpstream();
return null;
}
HashJoinMemoryCalculator.BuildSidePartitioning buildCalc;
{
// Initializing build calculator
// Limit scope of these variables to this block
int maxBatchSize = spilledState.isFirstCycle() ? RecordBatch.MAX_BATCH_ROW_COUNT : RECORDS_PER_BATCH;
boolean doMemoryCalculation = canSpill && !probeSideIsEmpty.booleanValue();
HashJoinMemoryCalculator calc = getCalculatorImpl();
calc.initialize(doMemoryCalculation);
buildCalc = calc.next();
// TODO Fix after
buildCalc.initialize(// TODO Fix after
spilledState.isFirstCycle(), // TODO Fix after
true, // fixed
buildBatch, probeBatch, buildJoinColumns, probeSideIsEmpty.booleanValue(), allocator.getLimit(), numPartitions, RECORDS_PER_BATCH, RECORDS_PER_BATCH, maxBatchSize, maxBatchSize, batchMemoryManager.getOutputBatchSize(), HashTable.DEFAULT_LOAD_FACTOR);
if (spilledState.isFirstCycle() && doMemoryCalculation) {
// Do auto tuning
buildCalc = partitionNumTuning(maxBatchSize, buildCalc);
}
}
if (spilledState.isFirstCycle()) {
// Do initial setup only on the first cycle
delayedSetup();
}
initializeBuild();
initializeRuntimeFilter();
// Make the calculator aware of our partitions
HashJoinMemoryCalculator.PartitionStatSet partitionStatSet = new HashJoinMemoryCalculator.PartitionStatSet(partitions);
buildCalc.setPartitionStatSet(partitionStatSet);
boolean moreData = true;
while (moreData) {
switch(rightUpstream) {
case NONE:
case NOT_YET:
moreData = false;
continue;
case OK_NEW_SCHEMA:
if (!buildSchema.equals(buildBatch.getSchema())) {
throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", buildSchema, buildBatch.getSchema());
}
for (HashPartition partn : partitions) {
partn.updateBatches();
}
// Fall through
case OK:
batchMemoryManager.update(buildBatch, RIGHT_INDEX, 0, true);
int currentRecordCount = buildBatch.getRecordCount();
// create runtime filter
if (spilledState.isFirstCycle() && enableRuntimeFilter) {
// create runtime filter and send out async
for (BloomFilter bloomFilter : bloomFilter2buildId.keySet()) {
int fieldId = bloomFilter2buildId.get(bloomFilter);
for (int ind = 0; ind < currentRecordCount; ind++) {
long hashCode = hash64.hash64Code(ind, 0, fieldId);
bloomFilter.insert(hashCode);
}
}
}
// incoming vectors as they are (no row copy)
if (numPartitions == 1) {
partitions[0].appendBatch(buildBatch);
break;
}
if (!spilledState.isFirstCycle()) {
read_right_HV_vector = (IntVector) buildBatch.getContainer().getLast();
}
// the result
for (int ind = 0; ind < currentRecordCount; ind++) {
int hashCode = spilledState.isFirstCycle() ? partitions[0].getBuildHashCode(ind) : // get the hash
read_right_HV_vector.getAccessor().get(ind);
// value from the
// HV column
int currPart = hashCode & spilledState.getPartitionMask();
hashCode >>>= spilledState.getBitsInMask();
// semi-join skips join-key-duplicate rows
if (semiJoin) {
}
// Append the new inner row to the appropriate partition; spill (that
// partition) if needed
partitions[currPart].appendInnerRow(buildBatch.getContainer(), ind, hashCode, buildCalc);
}
if (read_right_HV_vector != null) {
read_right_HV_vector.clear();
read_right_HV_vector = null;
}
break;
default:
throw new IllegalStateException(rightUpstream.name());
}
// Get the next incoming record batch
rightUpstream = next(HashJoinHelper.RIGHT_INPUT, buildBatch);
}
if (spilledState.isFirstCycle() && enableRuntimeFilter) {
if (bloomFilter2buildId.size() > 0) {
int hashJoinOpId = this.popConfig.getOperatorId();
runtimeFilterReporter.sendOut(bloomFilters, probeFields, this.popConfig.getRuntimeFilterDef(), hashJoinOpId);
}
}
// the spilled partitions list
if (numPartitions > 1) {
// a single partition needs no completion
for (HashPartition partn : partitions) {
partn.completeAnInnerBatch(false, partn.isSpilled());
}
}
prefetchFirstProbeBatch();
if (leftUpstream.isError()) {
// We need to terminate.
return leftUpstream;
}
HashJoinMemoryCalculator.PostBuildCalculations postBuildCalc = buildCalc.next();
// probeEmpty
postBuildCalc.initialize(probeSideIsEmpty.booleanValue());
for (int index = 0; index < partitions.length; index++) {
HashPartition partn = partitions[index];
if (partn.isSpilled()) {
// Don't build hash tables for spilled partitions
continue;
}
try {
if (postBuildCalc.shouldSpill()) {
// Spill this partition if we need to make room
partn.spillThisPartition();
} else {
// Only build hash tables for partitions that are not spilled
partn.buildContainersHashTableAndHelper();
}
} catch (OutOfMemoryException e) {
String message = "Failed building hash table on partition " + index + ":\n" + makeDebugString() + "\n" + postBuildCalc.makeDebugString();
// Include debug info
throw new OutOfMemoryException(message, e);
}
}
if (logger.isDebugEnabled()) {
logger.debug(postBuildCalc.makeDebugString());
}
for (HashPartition partn : partitions) {
if (partn.isSpilled()) {
HashJoinSpilledPartition sp = new HashJoinSpilledPartition(spilledState.getCycle(), partn.getPartitionNum(), originalPartition, partn.getPartitionBatchesCount(), partn.getSpillFile());
spilledState.addPartition(sp);
// for the outer to find
spilledInners[partn.getPartitionNum()] = sp;
// the SP later
partn.closeWriter();
partn.updateProbeRecordsPerBatch(postBuildCalc.getProbeRecordsPerBatch());
}
}
return null;
}
use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.
the class IndirectRowSet method makeSv2.
private static SelectionVector2 makeSv2(BufferAllocator allocator, VectorContainer container, Set<Integer> skipIndices) {
int rowCount = container.getRecordCount() - skipIndices.size();
SelectionVector2 sv2 = new SelectionVector2(allocator);
if (!sv2.allocateNewSafe(rowCount)) {
throw new OutOfMemoryException("Unable to allocate sv2 buffer");
}
for (int srcIndex = 0, destIndex = 0; srcIndex < container.getRecordCount(); srcIndex++) {
if (skipIndices.contains(srcIndex)) {
continue;
}
sv2.setIndex(destIndex, (char) srcIndex);
destIndex++;
}
sv2.setRecordCount(rowCount);
sv2.setBatchActualRecordCount(container.getRecordCount());
container.buildSchema(SelectionVectorMode.TWO_BYTE);
return sv2;
}
use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.
the class HashPartition method buildContainersHashTableAndHelper.
/**
* Creates the hash table and join helper for this partition.
* This method should only be called after all the build side records
* have been consumed.
*/
public void buildContainersHashTableAndHelper() throws SchemaChangeException {
// no building for spilled partitions
if (isSpilled) {
return;
}
containers = new ArrayList<>();
hashTable.updateInitialCapacity((int) getNumInMemoryRecords());
for (int curr = 0; curr < partitionBatchesCount; curr++) {
VectorContainer nextBatch = tmpBatchesList.get(curr);
final int currentRecordCount = nextBatch.getRecordCount();
// For every incoming build batch, we create a matching helper batch
if (!semiJoin) {
hjHelper.addNewBatch(currentRecordCount);
}
// Holder contains the global index where the key is hashed into using the hash table
final IndexPointer htIndex = new IndexPointer();
assert nextBatch != null;
assert probeBatch != null;
hashTable.updateIncoming(nextBatch, probeBatch);
IntVector HV_vector = (IntVector) nextBatch.getLast();
for (int recInd = 0; recInd < currentRecordCount; recInd++) {
int hashCode = HV_vector.getAccessor().get(recInd);
try {
hashTable.put(recInd, htIndex, hashCode, BATCH_SIZE);
} catch (RetryAfterSpillException RE) {
throw new OutOfMemoryException("HT put");
}
/* Use the global index returned by the hash table, to store
* the current record index and batch index. This will be used
* later when we probe and find a match.
*/
if (!semiJoin) {
hjHelper.setCurrentIndex(htIndex.value, curr, /* buildBatchIndex */
recInd);
}
}
containers.add(nextBatch);
}
// the inner is whole in memory, no need for an outer batch
outerBatchAllocNotNeeded = true;
}
use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.
the class BufferedBatches method newSV2.
/**
* Allocate and initialize the selection vector used as the sort index.
* Assumes that memory is available for the vector since memory management
* ensured space is available.
*
* @return a new, populated selection vector 2
*/
private SelectionVector2 newSV2(VectorAccessible incoming) {
SelectionVector2 sv2 = new SelectionVector2(context.getAllocator());
if (!sv2.allocateNewSafe(incoming.getRecordCount())) {
throw UserException.resourceError(new OutOfMemoryException("Unable to allocate sv2 buffer")).build(logger);
}
for (int i = 0; i < incoming.getRecordCount(); i++) {
sv2.setIndex(i, (char) i);
}
sv2.setRecordCount(incoming.getRecordCount());
return sv2;
}
Aggregations