use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.
the class VectorTopNKeyOperator method closeOp.
@Override
protected void closeOp(boolean abort) throws HiveException {
// LOG.info("Closing TopNKeyFilter: {}.", topNKeyFilter);
if (topNKeyFilters.size() == 1) {
TopNKeyFilter filter = topNKeyFilters.values().iterator().next();
LOG.info("Closing TopNKeyFilter: {}", filter);
filter.clear();
} else {
LOG.info("Closing {} TopNKeyFilters", topNKeyFilters.size());
for (TopNKeyFilter each : topNKeyFilters.values()) {
LOG.debug("Closing TopNKeyFilter: {}", each);
each.clear();
}
}
topNKeyFilters.clear();
disabledPartitions.clear();
super.closeOp(abort);
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.
the class VectorTopNKeyOperator method checkTopNFilterEfficiency.
public static void checkTopNFilterEfficiency(Map<KeyWrapper, TopNKeyFilter> filters, Set<KeyWrapper> disabledPartitions, float efficiencyThreshold, Logger log, long checkEfficiencyNumRows) {
Iterator<Map.Entry<KeyWrapper, TopNKeyFilter>> iterator = filters.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<KeyWrapper, TopNKeyFilter> each = iterator.next();
KeyWrapper partitionKey = each.getKey();
TopNKeyFilter filter = each.getValue();
log.debug("Checking TopN Filter efficiency {}, threshold: {}", filter, efficiencyThreshold);
if (filter.getTotal() >= checkEfficiencyNumRows && filter.forwardingRatio() >= efficiencyThreshold) {
log.info("Disabling TopN Filter {}", filter);
disabledPartitions.add(partitionKey);
}
}
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyFilter in project hive by apache.
the class VectorTopNKeyOperator method process.
@Override
public void process(Object data, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) data;
if (!disabledPartitions.isEmpty() && disabledPartitions.size() == topNKeyFilters.size()) {
// all filters are disabled due to efficiency check
vectorForward(batch);
return;
}
incomingBatches++;
// The selected vector represents selected rows.
// Clone the selected vector
System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
int[] selectedBackup = batch.selected;
batch.selected = temporarySelected;
int sizeBackup = batch.size;
boolean selectedInUseBackup = batch.selectedInUse;
for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
keyExpression.evaluate(batch);
}
partitionKeyWrapperBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers();
keyWrappersBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
// Filter rows with top n keys
int size = 0;
int[] selected = new int[batch.selected.length];
for (int i = 0; i < batch.size; i++) {
int j;
if (batch.selectedInUse) {
j = batch.selected[i];
} else {
j = i;
}
VectorHashKeyWrapperBase partitionKey = partitionKeyWrappers[i];
if (disabledPartitions.contains(partitionKey)) {
// filter for this partition is disabled
selected[size++] = j;
} else {
TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKey);
if (topNKeyFilter == null && topNKeyFilters.size() < conf.getMaxNumberOfPartitions()) {
topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator);
topNKeyFilters.put(partitionKey.copyKey(), topNKeyFilter);
}
if (topNKeyFilter == null || topNKeyFilter.canForward(keyWrappers[i])) {
selected[size++] = j;
}
}
}
// Apply selection to batch
if (batch.size != size) {
batch.selectedInUse = true;
batch.selected = selected;
batch.size = size;
}
// Forward the result
if (size > 0) {
vectorForward(batch);
}
// Restore the original selected vector
batch.selected = selectedBackup;
batch.size = sizeBackup;
batch.selectedInUse = selectedInUseBackup;
if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows());
}
}
Aggregations