Examples with HashTable - org.apache.drill.exec.physical.impl.common.HashTable

Example 1 with HashTable

use of org.apache.drill.exec.physical.impl.common.HashTable in project drill by apache.

the class HashAggTemplate method checkGroupAndAggrValues.

// Check if a group is present in the hash table; if not, insert it in the hash table.
// The htIdxHolder contains the index of the group in the hash table container; this same
// index is also used for the aggregation values maintained by the hash aggregate.
private void checkGroupAndAggrValues(int incomingRowIdx) {
    assert incomingRowIdx >= 0;
    assert !earlyOutput;
    // The hash code is computed once, then its lower bits are used to determine the
    // partition to use, and the higher bits determine the location in the hash table.
    int hashCode;
    try {
        // htables[0].updateBatches();
        hashCode = htables[0].getBuildHashCode(incomingRowIdx);
    } catch (SchemaChangeException e) {
        throw new UnsupportedOperationException("Unexpected schema change", e);
    }
    // right shift hash code for secondary (or tertiary...) spilling
    for (int i = 0; i < spilledState.getCycle(); i++) {
        hashCode >>>= spilledState.getBitsInMask();
    }
    int currentPartition = hashCode & spilledState.getPartitionMask();
    hashCode >>>= spilledState.getBitsInMask();
    HashTable.PutStatus putStatus = null;
    long allocatedBeforeHTput = allocator.getAllocatedMemory();
    String tryingTo = phase.is1st() ? "early return" : "spill";
    // Proactive spill - in case there is no reserve memory - spill and retry putting later
    if (reserveValueBatchMemory == 0 && canSpill) {
        logger.trace("Reserved memory runs short, trying to {} a partition and retry Hash Table put() again.", tryingTo);
        // spill to free some memory
        doSpill(currentPartition);
        retrySameIndex = true;
        // to retry this put()
        return;
    }
    // ==========================================
    try {
        putStatus = htables[currentPartition].put(incomingRowIdx, htIdxHolder, hashCode, getTargetBatchCount());
    } catch (RetryAfterSpillException re) {
        if (!canSpill) {
            throw new OutOfMemoryException(getOOMErrorMsg("Can not spill"));
        }
        logger.trace("HT put failed with an OOM, trying to {} a partition and retry Hash Table put() again.", tryingTo);
        // for debugging - in case there's a leak
        long memDiff = allocator.getAllocatedMemory() - allocatedBeforeHTput;
        if (memDiff > 0) {
            logger.warn("Leak: HashTable put() OOM left behind {} bytes allocated", memDiff);
        }
        // spill to free some memory
        doSpill(currentPartition);
        retrySameIndex = true;
        // to retry this put()
        return;
    } catch (OutOfMemoryException exc) {
        throw new OutOfMemoryException(getOOMErrorMsg("HT was: " + allocatedBeforeHTput), exc);
    } catch (SchemaChangeException e) {
        throw new UnsupportedOperationException("Unexpected schema change", e);
    }
    long allocatedBeforeAggCol = allocator.getAllocatedMemory();
    boolean needToCheckIfSpillIsNeeded = allocatedBeforeAggCol > allocatedBeforeHTput;
    // 
    if (putStatus == HashTable.PutStatus.NEW_BATCH_ADDED) {
        try {
            // try to preempt an OOM by using the reserve
            useReservedValuesMemory();
            // allocate a new (internal) values batch
            addBatchHolder(currentPartition, getTargetBatchCount());
            // restore the reserve, if possible
            restoreReservedMemory();
            // A reason to check for a spill - In case restore-reserve failed
            needToCheckIfSpillIsNeeded = (0 == reserveValueBatchMemory);
            // just allocated a planned batch
            if (plannedBatches > 0) {
                plannedBatches--;
            }
            long totalAddedMem = allocator.getAllocatedMemory() - allocatedBeforeHTput;
            long aggValuesAddedMem = allocator.getAllocatedMemory() - allocatedBeforeAggCol;
            logger.trace("MEMORY CHECK AGG: allocated now {}, added {}, total (with HT) added {}", allocator.getAllocatedMemory(), aggValuesAddedMem, totalAddedMem);
            // resize the batch estimates if needed (e.g., varchars may take more memory than estimated)
            if (totalAddedMem > estMaxBatchSize) {
                logger.trace("Adjusting Batch size estimate from {} to {}", estMaxBatchSize, totalAddedMem);
                estMaxBatchSize = totalAddedMem;
                needToCheckIfSpillIsNeeded = true;
            }
            if (aggValuesAddedMem > estValuesBatchSize) {
                logger.trace("Adjusting Values Batch size from {} to {}", estValuesBatchSize, aggValuesAddedMem);
                estValuesBatchSize = aggValuesAddedMem;
                needToCheckIfSpillIsNeeded = true;
            }
        } catch (OutOfMemoryException exc) {
            throw new OutOfMemoryException(getOOMErrorMsg("AGGR"), exc);
        }
    } else if (putStatus == HashTable.PutStatus.KEY_ADDED_LAST) {
        // If a batch just became full (i.e. another batch would be allocated soon) -- then need to
        // check (later, see below) if the memory limits are too close, and if so -- then spill !
        // planning to allocate one more batch
        plannedBatches++;
        needToCheckIfSpillIsNeeded = true;
    }
    // =================================================================
    // Locate the matching aggregate columns and perform the aggregation
    // =================================================================
    int currentIdx = htIdxHolder.value;
    BatchHolder bh = batchHolders[currentPartition].get((currentIdx >>> 16) & BATCH_MASK);
    int idxWithinBatch = currentIdx & BATCH_MASK;
    if (bh.updateAggrValues(incomingRowIdx, idxWithinBatch)) {
        numGroupedRecords++;
    }
    // ===================================================================================
    if (needToCheckIfSpillIsNeeded && canSpill && useMemoryPrediction) {
        spillIfNeeded(currentPartition);
    }
}

Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable) HashTable(org.apache.drill.exec.physical.impl.common.HashTable) RetryAfterSpillException(org.apache.drill.common.exceptions.RetryAfterSpillException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Example 2 with HashTable

use of org.apache.drill.exec.physical.impl.common.HashTable in project drill by axbaretto.

the class HashAggTemplate method checkGroupAndAggrValues.

// Check if a group is present in the hash table; if not, insert it in the hash table.
// The htIdxHolder contains the index of the group in the hash table container; this same
// index is also used for the aggregation values maintained by the hash aggregate.
private void checkGroupAndAggrValues(int incomingRowIdx) {
    assert incomingRowIdx >= 0;
    assert !earlyOutput;
    /**
     * for debugging
     *     Object tmp = (incoming).getValueAccessorById(0, BigIntVector.class).getValueVector();
     *     BigIntVector vv0 = null;
     *     BigIntHolder holder = null;
     *
     *     if (tmp != null) {
     *     vv0 = ((BigIntVector) tmp);
     *     holder = new BigIntHolder();
     *     holder.value = vv0.getAccessor().get(incomingRowIdx) ;
     *     }
     */
    /*
    if ( handlingSpills && ( incomingRowIdx == 0 ) ) {
      // for debugging -- show the first row from a spilled batch
      Object tmp0 = (incoming).getValueAccessorById(NullableVarCharVector.class, 0).getValueVector();
      Object tmp1 = (incoming).getValueAccessorById(NullableVarCharVector.class, 1).getValueVector();
      Object tmp2 = (incoming).getValueAccessorById(NullableBigIntVector.class, 2).getValueVector();

      if (tmp0 != null && tmp1 != null && tmp2 != null) {
        NullableVarCharVector vv0 = ((NullableVarCharVector) tmp0);
        NullableVarCharVector vv1 = ((NullableVarCharVector) tmp1);
        NullableBigIntVector  vv2 = ((NullableBigIntVector) tmp2);
        logger.debug("The first row = {} , {} , {}", vv0.getAccessor().get(incomingRowIdx), vv1.getAccessor().get(incomingRowIdx), vv2.getAccessor().get(incomingRowIdx));
      }
    }
    */
    // The hash code is computed once, then its lower bits are used to determine the
    // partition to use, and the higher bits determine the location in the hash table.
    int hashCode;
    try {
        // htables[0].updateBatches();
        hashCode = htables[0].getHashCode(incomingRowIdx);
    } catch (SchemaChangeException e) {
        throw new UnsupportedOperationException("Unexpected schema change", e);
    }
    // right shift hash code for secondary (or tertiary...) spilling
    for (int i = 0; i < cycleNum; i++) {
        hashCode >>>= bitsInMask;
    }
    int currentPartition = hashCode & partitionMask;
    hashCode >>>= bitsInMask;
    HashTable.PutStatus putStatus = null;
    long allocatedBeforeHTput = allocator.getAllocatedMemory();
    // Proactive spill - in case there is no reserve memory - spill and retry putting later
    if (reserveValueBatchMemory == 0 && canSpill) {
        logger.trace("Reserved memory runs short, trying to {} a partition and retry Hash Table put() again.", is1stPhase ? "early return" : "spill");
        // spill to free some memory
        doSpill(currentPartition);
        retrySameIndex = true;
        // to retry this put()
        return;
    }
    // ==========================================
    try {
        putStatus = htables[currentPartition].put(incomingRowIdx, htIdxHolder, hashCode);
    } catch (RetryAfterSpillException re) {
        if (!canSpill) {
            throw new OutOfMemoryException(getOOMErrorMsg("Can not spill"));
        }
        logger.trace("HT put failed with an OOM, trying to {} a partition and retry Hash Table put() again.", is1stPhase ? "early return" : "spill");
        // for debugging - in case there's a leak
        long memDiff = allocator.getAllocatedMemory() - allocatedBeforeHTput;
        if (memDiff > 0) {
            logger.warn("Leak: HashTable put() OOM left behind {} bytes allocated", memDiff);
        }
        // spill to free some memory
        doSpill(currentPartition);
        retrySameIndex = true;
        // to retry this put()
        return;
    } catch (OutOfMemoryException exc) {
        throw new OutOfMemoryException(getOOMErrorMsg("HT was: " + allocatedBeforeHTput), exc);
    } catch (SchemaChangeException e) {
        throw new UnsupportedOperationException("Unexpected schema change", e);
    }
    long allocatedBeforeAggCol = allocator.getAllocatedMemory();
    boolean needToCheckIfSpillIsNeeded = allocatedBeforeAggCol > allocatedBeforeHTput;
    // 
    if (putStatus == HashTable.PutStatus.NEW_BATCH_ADDED) {
        try {
            // try to preempt an OOM by using the reserve
            useReservedValuesMemory();
            // allocate a new (internal) values batch
            addBatchHolder(currentPartition);
            // restore the reserve, if possible
            restoreReservedMemory();
            // A reason to check for a spill - In case restore-reserve failed
            needToCheckIfSpillIsNeeded = (0 == reserveValueBatchMemory);
            // just allocated a planned batch
            if (plannedBatches > 0) {
                plannedBatches--;
            }
            long totalAddedMem = allocator.getAllocatedMemory() - allocatedBeforeHTput;
            long aggValuesAddedMem = allocator.getAllocatedMemory() - allocatedBeforeAggCol;
            logger.trace("MEMORY CHECK AGG: allocated now {}, added {}, total (with HT) added {}", allocator.getAllocatedMemory(), aggValuesAddedMem, totalAddedMem);
            // resize the batch estimates if needed (e.g., varchars may take more memory than estimated)
            if (totalAddedMem > estMaxBatchSize) {
                logger.trace("Adjusting Batch size estimate from {} to {}", estMaxBatchSize, totalAddedMem);
                estMaxBatchSize = totalAddedMem;
                needToCheckIfSpillIsNeeded = true;
            }
            if (aggValuesAddedMem > estValuesBatchSize) {
                logger.trace("Adjusting Values Batch size from {} to {}", estValuesBatchSize, aggValuesAddedMem);
                estValuesBatchSize = aggValuesAddedMem;
                needToCheckIfSpillIsNeeded = true;
            }
        } catch (OutOfMemoryException exc) {
            throw new OutOfMemoryException(getOOMErrorMsg("AGGR"), exc);
        }
    } else if (putStatus == HashTable.PutStatus.KEY_ADDED_LAST) {
        // If a batch just became full (i.e. another batch would be allocated soon) -- then need to
        // check (later, see below) if the memory limits are too close, and if so -- then spill !
        // planning to allocate one more batch
        plannedBatches++;
        needToCheckIfSpillIsNeeded = true;
    }
    // =================================================================
    // Locate the matching aggregate columns and perform the aggregation
    // =================================================================
    int currentIdx = htIdxHolder.value;
    BatchHolder bh = batchHolders[currentPartition].get((currentIdx >>> 16) & HashTable.BATCH_MASK);
    int idxWithinBatch = currentIdx & HashTable.BATCH_MASK;
    if (bh.updateAggrValues(incomingRowIdx, idxWithinBatch)) {
        numGroupedRecords++;
    }
    // ===================================================================================
    if (needToCheckIfSpillIsNeeded && canSpill && useMemoryPrediction) {
        spillIfNeeded(currentPartition);
    }
}

Aggregations

RetryAfterSpillException (org.apache.drill.common.exceptions.RetryAfterSpillException)2 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)2 ChainedHashTable (org.apache.drill.exec.physical.impl.common.ChainedHashTable)2 HashTable (org.apache.drill.exec.physical.impl.common.HashTable)2