Search in sources :

Example 1 with HashTableStats

use of org.apache.drill.exec.physical.impl.common.HashTableStats in project drill by apache.

the class HashJoinBatch method updateStats.

/**
 * Updates the {@link HashTable} and spilling stats after the original build
 * side is processed.
 *
 * Note: this does not update all the stats. The cycleNum is updated
 * dynamically in {@link #innerNext()} and the total bytes written is updated
 * at close time in {@link #cleanup()}.
 */
private void updateStats() {
    if (buildSideIsEmpty.booleanValue()) {
        return;
    }
    // no stats when the right side is empty
    if (!spilledState.isFirstCycle()) {
        return;
    }
    // These stats are only for before processing spilled files
    HashTableStats htStats = new HashTableStats();
    long numSpilled = 0;
    HashTableStats newStats = new HashTableStats();
    // sum the stats from all the partitions
    for (HashPartition partn : partitions) {
        if (partn.isSpilled()) {
            numSpilled++;
        }
        partn.getStats(newStats);
        htStats.addStats(newStats);
    }
    stats.setLongStat(Metric.NUM_BUCKETS, htStats.numBuckets);
    stats.setLongStat(Metric.NUM_ENTRIES, htStats.numEntries);
    stats.setLongStat(Metric.NUM_RESIZING, htStats.numResizing);
    stats.setLongStat(Metric.RESIZING_TIME_MS, htStats.resizingTime);
    stats.setLongStat(Metric.NUM_PARTITIONS, numPartitions);
    // Put 0 in
    stats.setLongStat(Metric.SPILL_CYCLE, spilledState.getCycle());
    // case no
    // spill
    stats.setLongStat(Metric.SPILLED_PARTITIONS, numSpilled);
}
Also used : HashTableStats(org.apache.drill.exec.physical.impl.common.HashTableStats) HashPartition(org.apache.drill.exec.physical.impl.common.HashPartition)

Example 2 with HashTableStats

use of org.apache.drill.exec.physical.impl.common.HashTableStats in project drill by apache.

the class HashAggTemplate method updateStats.

/**
 * Updates the stats at the time after all the input was read.
 * Note: For spilled partitions, their hash-table stats from before the spill are lost.
 * And the SPILLED_PARTITIONS only counts the spilled partitions in the primary, not SECONDARY etc.
 * @param htables
 */
private void updateStats(HashTable[] htables) {
    if (// These stats are only for before processing spilled files
    !spilledState.isFirstCycle() || handleEmit) // and no stats collecting when handling an EMIT
    {
        return;
    }
    long numSpilled = 0;
    HashTableStats newStats = new HashTableStats();
    // sum the stats from all the partitions
    for (int ind = 0; ind < spilledState.getNumPartitions(); ind++) {
        htables[ind].getStats(newStats);
        htStats.addStats(newStats);
        if (isSpilled(ind)) {
            numSpilled++;
        }
    }
    stats.setLongStat(Metric.NUM_BUCKETS, htStats.numBuckets);
    stats.setLongStat(Metric.NUM_ENTRIES, htStats.numEntries);
    stats.setLongStat(Metric.NUM_RESIZING, htStats.numResizing);
    stats.setLongStat(Metric.RESIZING_TIME_MS, htStats.resizingTime);
    stats.setLongStat(Metric.NUM_PARTITIONS, spilledState.getNumPartitions());
    // Put 0 in case no spill
    stats.setLongStat(Metric.SPILL_CYCLE, spilledState.getCycle());
    if (phase.is2nd()) {
        stats.setLongStat(Metric.SPILLED_PARTITIONS, numSpilled);
    }
    if (rowsReturnedEarly > 0) {
        // update stats - est. total MB returned early
        stats.setLongStat(// update stats - est. total MB returned early
        Metric.SPILL_MB, (int) Math.round(rowsReturnedEarly * estOutputRowWidth / 1024.0D / 1024.0));
    }
}
Also used : HashTableStats(org.apache.drill.exec.physical.impl.common.HashTableStats)

Example 3 with HashTableStats

use of org.apache.drill.exec.physical.impl.common.HashTableStats in project drill by axbaretto.

the class HashAggTemplate method updateStats.

/**
 * Updates the stats at the time after all the input was read.
 * Note: For spilled partitions, their hash-table stats from before the spill are lost.
 * And the SPILLED_PARTITIONS only counts the spilled partitions in the primary, not SECONDARY etc.
 * @param htables
 */
private void updateStats(HashTable[] htables) {
    // These stats are only for before processing spilled files
    if (cycleNum > 0) {
        return;
    }
    long numSpilled = 0;
    HashTableStats newStats = new HashTableStats();
    // sum the stats from all the partitions
    for (int ind = 0; ind < numPartitions; ind++) {
        htables[ind].getStats(newStats);
        htStats.addStats(newStats);
        if (isSpilled(ind)) {
            numSpilled++;
        }
    }
    this.stats.setLongStat(Metric.NUM_BUCKETS, htStats.numBuckets);
    this.stats.setLongStat(Metric.NUM_ENTRIES, htStats.numEntries);
    this.stats.setLongStat(Metric.NUM_RESIZING, htStats.numResizing);
    this.stats.setLongStat(Metric.RESIZING_TIME_MS, htStats.resizingTime);
    this.stats.setLongStat(Metric.NUM_PARTITIONS, numPartitions);
    // Put 0 in case no spill
    this.stats.setLongStat(Metric.SPILL_CYCLE, cycleNum);
    if (is2ndPhase) {
        this.stats.setLongStat(Metric.SPILLED_PARTITIONS, numSpilled);
    }
    if (rowsReturnedEarly > 0) {
        // update stats - est. total MB returned early
        stats.setLongStat(// update stats - est. total MB returned early
        Metric.SPILL_MB, (int) Math.round(rowsReturnedEarly * estOutputRowWidth / 1024.0D / 1024.0));
    }
}
Also used : HashTableStats(org.apache.drill.exec.physical.impl.common.HashTableStats)

Aggregations

HashTableStats (org.apache.drill.exec.physical.impl.common.HashTableStats)3 HashPartition (org.apache.drill.exec.physical.impl.common.HashPartition)1