Search in sources :

Example 51 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorExpressionDescriptor method getVectorExpressionClass.

public Class<?> getVectorExpressionClass(Class<?> udf, Descriptor descriptor) throws HiveException {
    VectorizedExpressions annotation = AnnotationUtils.getAnnotation(udf, VectorizedExpressions.class);
    if (annotation == null || annotation.value() == null) {
        return null;
    }
    Class<? extends VectorExpression>[] list = annotation.value();
    for (Class<? extends VectorExpression> ve : list) {
        try {
            if (ve.newInstance().getDescriptor().matches(descriptor)) {
                return ve;
            }
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate VectorExpression class " + ve.getSimpleName(), ex);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString());
        for (Class<? extends VectorExpression> ve : list) {
            try {
                LOG.debug("getVectorExpressionClass doesn't match " + ve.getSimpleName() + " " + ve.newInstance().getDescriptor().toString());
            } catch (Exception ex) {
                throw new HiveException(ex);
            }
        }
    }
    return null;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 52 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorPTFOperator method process.

/**
 * We are processing a batch from reduce processor that is only for one reducer key or PTF group.
 *
 * For a simple OVER (PARTITION BY column) or OVER (ORDER BY column), the reduce processor's
 * group key is the partition or order by key.
 *
 * For an OVER (PARTITION BY column1, ORDER BY column2), the reduce-shuffle group key is
 * the combination of the partition column1 and the order by column2.  In this case, this method
 * has to watch for changes in the partition and reset the group aggregations.
 *
 * The reduce processor calls setNextVectorBatchGroupStatus beforehand to tell us whether the
 * batch supplied to our process method is the last batch for the group key, or not.  This helps
 * us intelligently process the batch.
 */
@Override
public void process(Object row, int tag) throws HiveException {
    VectorizedRowBatch batch = (VectorizedRowBatch) row;
    for (VectorExpression orderExpression : orderExpressions) {
        orderExpression.evaluate(batch);
    }
    if (partitionExpressions != null) {
        for (VectorExpression partitionExpression : partitionExpressions) {
            partitionExpression.evaluate(batch);
        }
    }
    if (isPartitionOrderBy) {
        // Check for PARTITION BY key change when we have ORDER BY keys.
        if (isFirstPartition) {
            isFirstPartition = false;
            setCurrentPartition(batch);
        } else if (isPartitionChanged(batch)) {
            setCurrentPartition(batch);
            groupBatches.resetEvaluators();
        }
    }
    if (allEvaluatorsAreStreaming) {
        // We can process this batch immediately.
        groupBatches.evaluateStreamingGroupBatch(batch, isLastGroupBatch);
        forward(batch, null);
    } else {
        // Evaluate the aggregation functions over the group batch.
        groupBatches.evaluateGroupBatch(batch, isLastGroupBatch);
        if (!isLastGroupBatch) {
            // The group spans a VectorizedRowBatch.  Swap the relevant columns into our batch buffers,
            // or write the batch to temporary storage.
            groupBatches.bufferGroupBatch(batch);
            return;
        }
        /*
       * Last group batch.
       *
       * Take the (non-streaming) group aggregation values and write output columns for all
       * rows of every batch of the group.  As each group batch is finished being written, they are
       * forwarded to the next operator.
       */
        groupBatches.fillGroupResultsAndForward(this, batch);
    }
    // If we are only processing a PARTITION BY, reset our evaluators.
    if (!isPartitionOrderBy) {
        groupBatches.resetEvaluators();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 53 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorReduceSinkEmptyKeyOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        batchCounter++;
        if (batch.size == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        if (!isKeyInitialized) {
            isKeyInitialized = true;
            Preconditions.checkState(isEmptyKey);
            initializeEmptyKey(tag);
        }
        // Perform any value expressions.  Results will go into scratch columns.
        if (reduceSinkValueExpressions != null) {
            for (VectorExpression ve : reduceSinkValueExpressions) {
                ve.evaluate(batch);
            }
        }
        final int size = batch.size;
        if (!isEmptyValue) {
            if (batch.selectedInUse) {
                int[] selected = batch.selected;
                for (int logical = 0; logical < size; logical++) {
                    final int batchIndex = selected[logical];
                    valueLazyBinarySerializeWrite.reset();
                    valueVectorSerializeRow.serializeWrite(batch, batchIndex);
                    valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
                    collect(keyWritable, valueBytesWritable);
                }
            } else {
                for (int batchIndex = 0; batchIndex < size; batchIndex++) {
                    valueLazyBinarySerializeWrite.reset();
                    valueVectorSerializeRow.serializeWrite(batch, batchIndex);
                    valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
                    collect(keyWritable, valueBytesWritable);
                }
            }
        } else {
            // Empty value, too.
            for (int i = 0; i < size; i++) {
                collect(keyWritable, valueBytesWritable);
            }
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 54 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorReduceSinkObjectHashOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        batchCounter++;
        if (batch.size == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        if (!isKeyInitialized) {
            isKeyInitialized = true;
            if (isEmptyKey) {
                initializeEmptyKey(tag);
            }
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (reduceSinkKeyExpressions != null) {
            for (VectorExpression ve : reduceSinkKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any value expressions.  Results will go into scratch columns.
        if (reduceSinkValueExpressions != null) {
            for (VectorExpression ve : reduceSinkValueExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any bucket expressions.  Results will go into scratch columns.
        if (reduceSinkBucketExpressions != null) {
            for (VectorExpression ve : reduceSinkBucketExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any partition expressions.  Results will go into scratch columns.
        if (reduceSinkPartitionExpressions != null) {
            for (VectorExpression ve : reduceSinkPartitionExpressions) {
                ve.evaluate(batch);
            }
        }
        final boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        final int size = batch.size;
        for (int logical = 0; logical < size; logical++) {
            final int batchIndex = (selectedInUse ? selected[logical] : logical);
            final int hashCode;
            if (isEmptyBuckets) {
                if (isEmptyPartitions) {
                    hashCode = nonPartitionRandom.nextInt();
                } else {
                    partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
                    hashCode = ObjectInspectorUtils.getBucketHashCode(partitionFieldValues, partitionObjectInspectors);
                }
            } else {
                bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
                final int bucketNum = ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets);
                if (isEmptyPartitions) {
                    hashCode = nonPartitionRandom.nextInt() * 31 + bucketNum;
                } else {
                    partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
                    hashCode = ObjectInspectorUtils.getBucketHashCode(partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
                }
            }
            if (!isEmptyKey) {
                keyBinarySortableSerializeWrite.reset();
                keyVectorSerializeRow.serializeWrite(batch, batchIndex);
                // One serialized key for 1 or more rows for the duplicate keys.
                final int keyLength = keyOutput.getLength();
                if (tag == -1 || reduceSkipTag) {
                    keyWritable.set(keyOutput.getData(), 0, keyLength);
                } else {
                    keyWritable.setSize(keyLength + 1);
                    System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
                    keyWritable.get()[keyLength] = reduceTagByte;
                }
                keyWritable.setDistKeyLength(keyLength);
            }
            keyWritable.setHashCode(hashCode);
            if (!isEmptyValue) {
                valueLazyBinarySerializeWrite.reset();
                valueVectorSerializeRow.serializeWrite(batch, batchIndex);
                valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
            }
            collect(keyWritable, valueBytesWritable);
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 55 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinGenerateResultOperator method performValueExpressions.

// ------------------------------------------------------------------------------------------------
protected void performValueExpressions(VectorizedRowBatch batch, int[] allMatchs, int allMatchCount) {
    /*
     *  For the moment, pretend all matched are selected so we can evaluate the value
     *  expressions.
     *
     *  Since we may use the overflow batch when generating results, we will assign the
     *  selected and real batch size later...
     */
    int[] saveSelected = batch.selected;
    batch.selected = allMatchs;
    boolean saveSelectedInUse = batch.selectedInUse;
    batch.selectedInUse = true;
    batch.size = allMatchCount;
    // Run our value expressions over whole batch.
    for (VectorExpression ve : bigTableValueExpressions) {
        ve.evaluate(batch);
    }
    batch.selected = saveSelected;
    batch.selectedInUse = saveSelectedInUse;
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)78 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)41 ArrayList (java.util.ArrayList)37 Test (org.junit.Test)28 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)26 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)26 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)17 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)17 IOException (java.io.IOException)13 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)12 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 List (java.util.List)7 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)7 VectorMapJoinHashTableResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6