Search in sources :

Example 41 with OutOfMemoryException

use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.

the class HashJoinBatch method executeBuildPhase.

/**
 * Execute the BUILD phase; first read incoming and split rows into
 * partitions; may decide to spill some of the partitions
 *
 * @return Returns an
 *         {@link org.apache.drill.exec.record.RecordBatch.IterOutcome} if a
 *         termination condition is reached. Otherwise returns null.
 * @throws SchemaChangeException
 */
public IterOutcome executeBuildPhase() throws SchemaChangeException {
    if (buildSideIsEmpty.booleanValue()) {
        // empty right
        return null;
    }
    if (skipHashTableBuild) {
        // No hash table needed - then consume all the
        // right upstream
        killAndDrainRightUpstream();
        return null;
    }
    HashJoinMemoryCalculator.BuildSidePartitioning buildCalc;
    {
        // Initializing build calculator
        // Limit scope of these variables to this block
        int maxBatchSize = spilledState.isFirstCycle() ? RecordBatch.MAX_BATCH_ROW_COUNT : RECORDS_PER_BATCH;
        boolean doMemoryCalculation = canSpill && !probeSideIsEmpty.booleanValue();
        HashJoinMemoryCalculator calc = getCalculatorImpl();
        calc.initialize(doMemoryCalculation);
        buildCalc = calc.next();
        // TODO Fix after
        buildCalc.initialize(// TODO Fix after
        spilledState.isFirstCycle(), // TODO Fix after
        true, // fixed
        buildBatch, probeBatch, buildJoinColumns, probeSideIsEmpty.booleanValue(), allocator.getLimit(), numPartitions, RECORDS_PER_BATCH, RECORDS_PER_BATCH, maxBatchSize, maxBatchSize, batchMemoryManager.getOutputBatchSize(), HashTable.DEFAULT_LOAD_FACTOR);
        if (spilledState.isFirstCycle() && doMemoryCalculation) {
            // Do auto tuning
            buildCalc = partitionNumTuning(maxBatchSize, buildCalc);
        }
    }
    if (spilledState.isFirstCycle()) {
        // Do initial setup only on the first cycle
        delayedSetup();
    }
    initializeBuild();
    initializeRuntimeFilter();
    // Make the calculator aware of our partitions
    HashJoinMemoryCalculator.PartitionStatSet partitionStatSet = new HashJoinMemoryCalculator.PartitionStatSet(partitions);
    buildCalc.setPartitionStatSet(partitionStatSet);
    boolean moreData = true;
    while (moreData) {
        switch(rightUpstream) {
            case NONE:
            case NOT_YET:
                moreData = false;
                continue;
            case OK_NEW_SCHEMA:
                if (!buildSchema.equals(buildBatch.getSchema())) {
                    throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", buildSchema, buildBatch.getSchema());
                }
                for (HashPartition partn : partitions) {
                    partn.updateBatches();
                }
            // Fall through
            case OK:
                batchMemoryManager.update(buildBatch, RIGHT_INDEX, 0, true);
                int currentRecordCount = buildBatch.getRecordCount();
                // create runtime filter
                if (spilledState.isFirstCycle() && enableRuntimeFilter) {
                    // create runtime filter and send out async
                    for (BloomFilter bloomFilter : bloomFilter2buildId.keySet()) {
                        int fieldId = bloomFilter2buildId.get(bloomFilter);
                        for (int ind = 0; ind < currentRecordCount; ind++) {
                            long hashCode = hash64.hash64Code(ind, 0, fieldId);
                            bloomFilter.insert(hashCode);
                        }
                    }
                }
                // incoming vectors as they are (no row copy)
                if (numPartitions == 1) {
                    partitions[0].appendBatch(buildBatch);
                    break;
                }
                if (!spilledState.isFirstCycle()) {
                    read_right_HV_vector = (IntVector) buildBatch.getContainer().getLast();
                }
                // the result
                for (int ind = 0; ind < currentRecordCount; ind++) {
                    int hashCode = spilledState.isFirstCycle() ? partitions[0].getBuildHashCode(ind) : // get the hash
                    read_right_HV_vector.getAccessor().get(ind);
                    // value from the
                    // HV column
                    int currPart = hashCode & spilledState.getPartitionMask();
                    hashCode >>>= spilledState.getBitsInMask();
                    // semi-join skips join-key-duplicate rows
                    if (semiJoin) {
                    }
                    // Append the new inner row to the appropriate partition; spill (that
                    // partition) if needed
                    partitions[currPart].appendInnerRow(buildBatch.getContainer(), ind, hashCode, buildCalc);
                }
                if (read_right_HV_vector != null) {
                    read_right_HV_vector.clear();
                    read_right_HV_vector = null;
                }
                break;
            default:
                throw new IllegalStateException(rightUpstream.name());
        }
        // Get the next incoming record batch
        rightUpstream = next(HashJoinHelper.RIGHT_INPUT, buildBatch);
    }
    if (spilledState.isFirstCycle() && enableRuntimeFilter) {
        if (bloomFilter2buildId.size() > 0) {
            int hashJoinOpId = this.popConfig.getOperatorId();
            runtimeFilterReporter.sendOut(bloomFilters, probeFields, this.popConfig.getRuntimeFilterDef(), hashJoinOpId);
        }
    }
    // the spilled partitions list
    if (numPartitions > 1) {
        // a single partition needs no completion
        for (HashPartition partn : partitions) {
            partn.completeAnInnerBatch(false, partn.isSpilled());
        }
    }
    prefetchFirstProbeBatch();
    if (leftUpstream.isError()) {
        // We need to terminate.
        return leftUpstream;
    }
    HashJoinMemoryCalculator.PostBuildCalculations postBuildCalc = buildCalc.next();
    // probeEmpty
    postBuildCalc.initialize(probeSideIsEmpty.booleanValue());
    for (int index = 0; index < partitions.length; index++) {
        HashPartition partn = partitions[index];
        if (partn.isSpilled()) {
            // Don't build hash tables for spilled partitions
            continue;
        }
        try {
            if (postBuildCalc.shouldSpill()) {
                // Spill this partition if we need to make room
                partn.spillThisPartition();
            } else {
                // Only build hash tables for partitions that are not spilled
                partn.buildContainersHashTableAndHelper();
            }
        } catch (OutOfMemoryException e) {
            String message = "Failed building hash table on partition " + index + ":\n" + makeDebugString() + "\n" + postBuildCalc.makeDebugString();
            // Include debug info
            throw new OutOfMemoryException(message, e);
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug(postBuildCalc.makeDebugString());
    }
    for (HashPartition partn : partitions) {
        if (partn.isSpilled()) {
            HashJoinSpilledPartition sp = new HashJoinSpilledPartition(spilledState.getCycle(), partn.getPartitionNum(), originalPartition, partn.getPartitionBatchesCount(), partn.getSpillFile());
            spilledState.addPartition(sp);
            // for the outer to find
            spilledInners[partn.getPartitionNum()] = sp;
            // the SP later
            partn.closeWriter();
            partn.updateProbeRecordsPerBatch(postBuildCalc.getProbeRecordsPerBatch());
        }
    }
    return null;
}
Also used : BloomFilter(org.apache.drill.exec.work.filter.BloomFilter) HashPartition(org.apache.drill.exec.physical.impl.common.HashPartition) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Example 42 with OutOfMemoryException

use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.

the class IndirectRowSet method makeSv2.

private static SelectionVector2 makeSv2(BufferAllocator allocator, VectorContainer container, Set<Integer> skipIndices) {
    int rowCount = container.getRecordCount() - skipIndices.size();
    SelectionVector2 sv2 = new SelectionVector2(allocator);
    if (!sv2.allocateNewSafe(rowCount)) {
        throw new OutOfMemoryException("Unable to allocate sv2 buffer");
    }
    for (int srcIndex = 0, destIndex = 0; srcIndex < container.getRecordCount(); srcIndex++) {
        if (skipIndices.contains(srcIndex)) {
            continue;
        }
        sv2.setIndex(destIndex, (char) srcIndex);
        destIndex++;
    }
    sv2.setRecordCount(rowCount);
    sv2.setBatchActualRecordCount(container.getRecordCount());
    container.buildSchema(SelectionVectorMode.TWO_BYTE);
    return sv2;
}
Also used : SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Example 43 with OutOfMemoryException

use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.

the class HashPartition method buildContainersHashTableAndHelper.

/**
 * Creates the hash table and join helper for this partition.
 * This method should only be called after all the build side records
 * have been consumed.
 */
public void buildContainersHashTableAndHelper() throws SchemaChangeException {
    // no building for spilled partitions
    if (isSpilled) {
        return;
    }
    containers = new ArrayList<>();
    hashTable.updateInitialCapacity((int) getNumInMemoryRecords());
    for (int curr = 0; curr < partitionBatchesCount; curr++) {
        VectorContainer nextBatch = tmpBatchesList.get(curr);
        final int currentRecordCount = nextBatch.getRecordCount();
        // For every incoming build batch, we create a matching helper batch
        if (!semiJoin) {
            hjHelper.addNewBatch(currentRecordCount);
        }
        // Holder contains the global index where the key is hashed into using the hash table
        final IndexPointer htIndex = new IndexPointer();
        assert nextBatch != null;
        assert probeBatch != null;
        hashTable.updateIncoming(nextBatch, probeBatch);
        IntVector HV_vector = (IntVector) nextBatch.getLast();
        for (int recInd = 0; recInd < currentRecordCount; recInd++) {
            int hashCode = HV_vector.getAccessor().get(recInd);
            try {
                hashTable.put(recInd, htIndex, hashCode, BATCH_SIZE);
            } catch (RetryAfterSpillException RE) {
                throw new OutOfMemoryException("HT put");
            }
            /* Use the global index returned by the hash table, to store
         * the current record index and batch index. This will be used
         * later when we probe and find a match.
         */
            if (!semiJoin) {
                hjHelper.setCurrentIndex(htIndex.value, curr, /* buildBatchIndex */
                recInd);
            }
        }
        containers.add(nextBatch);
    }
    // the inner is whole in memory, no need for an outer batch
    outerBatchAllocNotNeeded = true;
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) RetryAfterSpillException(org.apache.drill.common.exceptions.RetryAfterSpillException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 44 with OutOfMemoryException

use of org.apache.drill.exec.exception.OutOfMemoryException in project drill by apache.

the class BufferedBatches method newSV2.

/**
 * Allocate and initialize the selection vector used as the sort index.
 * Assumes that memory is available for the vector since memory management
 * ensured space is available.
 *
 * @return a new, populated selection vector 2
 */
private SelectionVector2 newSV2(VectorAccessible incoming) {
    SelectionVector2 sv2 = new SelectionVector2(context.getAllocator());
    if (!sv2.allocateNewSafe(incoming.getRecordCount())) {
        throw UserException.resourceError(new OutOfMemoryException("Unable to allocate sv2 buffer")).build(logger);
    }
    for (int i = 0; i < incoming.getRecordCount(); i++) {
        sv2.setIndex(i, (char) i);
    }
    sv2.setRecordCount(incoming.getRecordCount());
    return sv2;
}
Also used : SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Aggregations

OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)44 DrillBuf (io.netty.buffer.DrillBuf)12 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)10 Test (org.junit.Test)10 IOException (java.io.IOException)9 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 ByteBuf (io.netty.buffer.ByteBuf)6 BufferAllocator (org.apache.drill.exec.memory.BufferAllocator)6 LogFixture (org.apache.drill.test.LogFixture)6 LogFixtureBuilder (org.apache.drill.test.LogFixture.LogFixtureBuilder)6 SubOperatorTest (org.apache.drill.test.SubOperatorTest)6 MemoryTest (org.apache.drill.categories.MemoryTest)4 RetryAfterSpillException (org.apache.drill.common.exceptions.RetryAfterSpillException)4 Accountant (org.apache.drill.exec.memory.Accountant)4 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)3 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)3 ValueVector (org.apache.drill.exec.vector.ValueVector)3 Stopwatch (com.google.common.base.Stopwatch)2 CompositeByteBuf (io.netty.buffer.CompositeByteBuf)2 CorruptedFrameException (io.netty.handler.codec.CorruptedFrameException)2