Search in sources :

Example 1 with TopNKeyFilter

use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.

the class VectorTopNKeyOperator method closeOp.

@Override
protected void closeOp(boolean abort) throws HiveException {
    // LOG.info("Closing TopNKeyFilter: {}.", topNKeyFilter);
    if (topNKeyFilters.size() == 1) {
        TopNKeyFilter filter = topNKeyFilters.values().iterator().next();
        LOG.info("Closing TopNKeyFilter: {}", filter);
        filter.clear();
    } else {
        LOG.info("Closing {} TopNKeyFilters", topNKeyFilters.size());
        for (TopNKeyFilter each : topNKeyFilters.values()) {
            LOG.debug("Closing TopNKeyFilter: {}", each);
            each.clear();
        }
    }
    topNKeyFilters.clear();
    disabledPartitions.clear();
    super.closeOp(abort);
}
Also used : TopNKeyFilter(org.apache.hadoop.hive.ql.exec.TopNKeyFilter)

Example 2 with TopNKeyFilter

use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.

the class VectorTopNKeyOperator method checkTopNFilterEfficiency.

public static void checkTopNFilterEfficiency(Map<KeyWrapper, TopNKeyFilter> filters, Set<KeyWrapper> disabledPartitions, float efficiencyThreshold, Logger log, long checkEfficiencyNumRows) {
    Iterator<Map.Entry<KeyWrapper, TopNKeyFilter>> iterator = filters.entrySet().iterator();
    while (iterator.hasNext()) {
        Map.Entry<KeyWrapper, TopNKeyFilter> each = iterator.next();
        KeyWrapper partitionKey = each.getKey();
        TopNKeyFilter filter = each.getValue();
        log.debug("Checking TopN Filter efficiency {}, threshold: {}", filter, efficiencyThreshold);
        if (filter.getTotal() >= checkEfficiencyNumRows && filter.forwardingRatio() >= efficiencyThreshold) {
            log.info("Disabling TopN Filter {}", filter);
            disabledPartitions.add(partitionKey);
        }
    }
}
Also used : KeyWrapper(org.apache.hadoop.hive.ql.exec.KeyWrapper) TopNKeyFilter(org.apache.hadoop.hive.ql.exec.TopNKeyFilter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with TopNKeyFilter

use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.

the class VectorTopNKeyOperator method process.

@Override
public void process(Object data, int tag) throws HiveException {
    VectorizedRowBatch batch = (VectorizedRowBatch) data;
    if (!disabledPartitions.isEmpty() && disabledPartitions.size() == topNKeyFilters.size()) {
        // all filters are disabled due to efficiency check
        vectorForward(batch);
        return;
    }
    incomingBatches++;
    // The selected vector represents selected rows.
    // Clone the selected vector
    System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
    int[] selectedBackup = batch.selected;
    batch.selected = temporarySelected;
    int sizeBackup = batch.size;
    boolean selectedInUseBackup = batch.selectedInUse;
    for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
        keyExpression.evaluate(batch);
    }
    partitionKeyWrapperBatch.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers();
    keyWrappersBatch.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
    // Filter rows with top n keys
    int size = 0;
    int[] selected = new int[batch.selected.length];
    for (int i = 0; i < batch.size; i++) {
        int j;
        if (batch.selectedInUse) {
            j = batch.selected[i];
        } else {
            j = i;
        }
        VectorHashKeyWrapperBase partitionKey = partitionKeyWrappers[i];
        if (disabledPartitions.contains(partitionKey)) {
            // filter for this partition is disabled
            selected[size++] = j;
        } else {
            TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKey);
            if (topNKeyFilter == null && topNKeyFilters.size() < conf.getMaxNumberOfPartitions()) {
                topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator);
                topNKeyFilters.put(partitionKey.copyKey(), topNKeyFilter);
            }
            if (topNKeyFilter == null || topNKeyFilter.canForward(keyWrappers[i])) {
                selected[size++] = j;
            }
        }
    }
    // Apply selection to batch
    if (batch.size != size) {
        batch.selectedInUse = true;
        batch.selected = selected;
        batch.size = size;
    }
    // Forward the result
    if (size > 0) {
        vectorForward(batch);
    }
    // Restore the original selected vector
    batch.selected = selectedBackup;
    batch.size = sizeBackup;
    batch.selectedInUse = selectedInUseBackup;
    if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
        checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows());
    }
}
Also used : VectorHashKeyWrapperBase(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase) TopNKeyFilter(org.apache.hadoop.hive.ql.exec.TopNKeyFilter) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

TopNKeyFilter (org.apache.hadoop.hive.ql.exec.TopNKeyFilter)3 HashMap (java.util.HashMap)1 Map (java.util.Map)1 KeyWrapper (org.apache.hadoop.hive.ql.exec.KeyWrapper)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorHashKeyWrapperBase (org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase)1