use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorExpressionDescriptor method getVectorExpressionClass.
public Class<?> getVectorExpressionClass(Class<?> udf, Descriptor descriptor) throws HiveException {
VectorizedExpressions annotation = AnnotationUtils.getAnnotation(udf, VectorizedExpressions.class);
if (annotation == null || annotation.value() == null) {
return null;
}
Class<? extends VectorExpression>[] list = annotation.value();
for (Class<? extends VectorExpression> ve : list) {
try {
if (ve.newInstance().getDescriptor().matches(descriptor)) {
return ve;
}
} catch (Exception ex) {
throw new HiveException("Could not instantiate VectorExpression class " + ve.getSimpleName(), ex);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString());
for (Class<? extends VectorExpression> ve : list) {
try {
LOG.debug("getVectorExpressionClass doesn't match " + ve.getSimpleName() + " " + ve.newInstance().getDescriptor().toString());
} catch (Exception ex) {
throw new HiveException(ex);
}
}
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorPTFOperator method process.
/**
* We are processing a batch from reduce processor that is only for one reducer key or PTF group.
*
* For a simple OVER (PARTITION BY column) or OVER (ORDER BY column), the reduce processor's
* group key is the partition or order by key.
*
* For an OVER (PARTITION BY column1, ORDER BY column2), the reduce-shuffle group key is
* the combination of the partition column1 and the order by column2. In this case, this method
* has to watch for changes in the partition and reset the group aggregations.
*
* The reduce processor calls setNextVectorBatchGroupStatus beforehand to tell us whether the
* batch supplied to our process method is the last batch for the group key, or not. This helps
* us intelligently process the batch.
*/
@Override
public void process(Object row, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
for (VectorExpression orderExpression : orderExpressions) {
orderExpression.evaluate(batch);
}
if (partitionExpressions != null) {
for (VectorExpression partitionExpression : partitionExpressions) {
partitionExpression.evaluate(batch);
}
}
if (isPartitionOrderBy) {
// Check for PARTITION BY key change when we have ORDER BY keys.
if (isFirstPartition) {
isFirstPartition = false;
setCurrentPartition(batch);
} else if (isPartitionChanged(batch)) {
setCurrentPartition(batch);
groupBatches.resetEvaluators();
}
}
if (allEvaluatorsAreStreaming) {
// We can process this batch immediately.
groupBatches.evaluateStreamingGroupBatch(batch, isLastGroupBatch);
forward(batch, null);
} else {
// Evaluate the aggregation functions over the group batch.
groupBatches.evaluateGroupBatch(batch, isLastGroupBatch);
if (!isLastGroupBatch) {
// The group spans a VectorizedRowBatch. Swap the relevant columns into our batch buffers,
// or write the batch to temporary storage.
groupBatches.bufferGroupBatch(batch);
return;
}
/*
* Last group batch.
*
* Take the (non-streaming) group aggregation values and write output columns for all
* rows of every batch of the group. As each group batch is finished being written, they are
* forwarded to the next operator.
*/
groupBatches.fillGroupResultsAndForward(this, batch);
}
// If we are only processing a PARTITION BY, reset our evaluators.
if (!isPartitionOrderBy) {
groupBatches.resetEvaluators();
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorReduceSinkEmptyKeyOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
batchCounter++;
if (batch.size == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
if (!isKeyInitialized) {
isKeyInitialized = true;
Preconditions.checkState(isEmptyKey);
initializeEmptyKey(tag);
}
// Perform any value expressions. Results will go into scratch columns.
if (reduceSinkValueExpressions != null) {
for (VectorExpression ve : reduceSinkValueExpressions) {
ve.evaluate(batch);
}
}
final int size = batch.size;
if (!isEmptyValue) {
if (batch.selectedInUse) {
int[] selected = batch.selected;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = selected[logical];
valueLazyBinarySerializeWrite.reset();
valueVectorSerializeRow.serializeWrite(batch, batchIndex);
valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
collect(keyWritable, valueBytesWritable);
}
} else {
for (int batchIndex = 0; batchIndex < size; batchIndex++) {
valueLazyBinarySerializeWrite.reset();
valueVectorSerializeRow.serializeWrite(batch, batchIndex);
valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
collect(keyWritable, valueBytesWritable);
}
}
} else {
// Empty value, too.
for (int i = 0; i < size; i++) {
collect(keyWritable, valueBytesWritable);
}
}
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorReduceSinkObjectHashOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
batchCounter++;
if (batch.size == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
if (!isKeyInitialized) {
isKeyInitialized = true;
if (isEmptyKey) {
initializeEmptyKey(tag);
}
}
// Perform any key expressions. Results will go into scratch columns.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
ve.evaluate(batch);
}
}
// Perform any value expressions. Results will go into scratch columns.
if (reduceSinkValueExpressions != null) {
for (VectorExpression ve : reduceSinkValueExpressions) {
ve.evaluate(batch);
}
}
// Perform any bucket expressions. Results will go into scratch columns.
if (reduceSinkBucketExpressions != null) {
for (VectorExpression ve : reduceSinkBucketExpressions) {
ve.evaluate(batch);
}
}
// Perform any partition expressions. Results will go into scratch columns.
if (reduceSinkPartitionExpressions != null) {
for (VectorExpression ve : reduceSinkPartitionExpressions) {
ve.evaluate(batch);
}
}
final boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
final int size = batch.size;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = (selectedInUse ? selected[logical] : logical);
final int hashCode;
if (isEmptyBuckets) {
if (isEmptyPartitions) {
hashCode = nonPartitionRandom.nextInt();
} else {
partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
hashCode = ObjectInspectorUtils.getBucketHashCode(partitionFieldValues, partitionObjectInspectors);
}
} else {
bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
final int bucketNum = ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets);
if (isEmptyPartitions) {
hashCode = nonPartitionRandom.nextInt() * 31 + bucketNum;
} else {
partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
hashCode = ObjectInspectorUtils.getBucketHashCode(partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
}
}
if (!isEmptyKey) {
keyBinarySortableSerializeWrite.reset();
keyVectorSerializeRow.serializeWrite(batch, batchIndex);
// One serialized key for 1 or more rows for the duplicate keys.
final int keyLength = keyOutput.getLength();
if (tag == -1 || reduceSkipTag) {
keyWritable.set(keyOutput.getData(), 0, keyLength);
} else {
keyWritable.setSize(keyLength + 1);
System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
keyWritable.get()[keyLength] = reduceTagByte;
}
keyWritable.setDistKeyLength(keyLength);
}
keyWritable.setHashCode(hashCode);
if (!isEmptyValue) {
valueLazyBinarySerializeWrite.reset();
valueVectorSerializeRow.serializeWrite(batch, batchIndex);
valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
}
collect(keyWritable, valueBytesWritable);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinGenerateResultOperator method performValueExpressions.
// ------------------------------------------------------------------------------------------------
protected void performValueExpressions(VectorizedRowBatch batch, int[] allMatchs, int allMatchCount) {
/*
* For the moment, pretend all matched are selected so we can evaluate the value
* expressions.
*
* Since we may use the overflow batch when generating results, we will assign the
* selected and real batch size later...
*/
int[] saveSelected = batch.selected;
batch.selected = allMatchs;
boolean saveSelectedInUse = batch.selectedInUse;
batch.selectedInUse = true;
batch.size = allMatchCount;
// Run our value expressions over whole batch.
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
batch.selected = saveSelected;
batch.selectedInUse = saveSelectedInUse;
}
Aggregations