use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc in project hive by apache.
the class VectorMapOperator method setupPartitionContextVars.
/*
* Setup the context for reading from the next partition file.
*/
private void setupPartitionContextVars(String nominalPath) throws HiveException {
currentVectorPartContext = fileToPartitionContextMap.get(nominalPath);
if (currentVectorPartContext == null) {
return;
}
PartitionDesc partDesc = currentVectorPartContext.getPartDesc();
VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
currentReadType = vectorPartDesc.getVectorMapOperatorReadType();
/*
* Setup for 3 different kinds of vectorized reading supported:
*
* 1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
*
* 2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
*
* 3) And read using the regular partition deserializer to get the row object and assigning
* the row object into the VectorizedRowBatch with VectorAssignRow.
*/
if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
/*
* The Vectorized Input File Format reader is responsible for setting the partition column
* values, resetting and filling in the batch, etc.
*/
/*
* Clear all the reading variables.
*/
currentDataColumnCount = 0;
currentDeserializeRead = null;
currentVectorDeserializeRow = null;
currentPartDeserializer = null;
currentPartRawRowObjectInspector = null;
currentVectorAssign = null;
} else {
/*
* We will get "regular" single rows from the Input File Format reader that we will need
* to {vector|row} deserialize.
*/
Preconditions.checkState(currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
/*
* Clear out any rows in the batch from previous partition since we are going to change
* the repeating partition column values.
*/
if (!flushDeserializerBatch()) {
// Operator tree is now done.
return;
}
/*
* For this particular file, how many columns will we actually read?
*/
currentDataColumnCount = currentVectorPartContext.getReaderDataColumnCount();
if (currentDataColumnCount < dataColumnCount) {
/*
* Default any additional data columns to NULL once for the file (if they are present).
*/
for (int i = currentDataColumnCount; i < dataColumnCount; i++) {
ColumnVector colVector = deserializerBatch.cols[i];
if (colVector != null) {
colVector.isNull[0] = true;
colVector.noNulls = false;
colVector.isRepeating = true;
}
}
}
if (batchContext.getPartitionColumnCount() > 0) {
/*
* The partition columns are set once for the partition and are marked repeating.
*/
VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues);
batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
}
if (hasRowIdentifier) {
// No ACID in code path -- set ROW__ID to NULL.
setRowIdentiferToNull(deserializerBatch);
}
/*
* Set or clear the rest of the reading variables based on {vector|row} deserialization.
*/
switch(currentReadType) {
case VECTOR_DESERIALIZE:
{
VectorDeserializePartitionContext vectorDeserPartContext = (VectorDeserializePartitionContext) currentVectorPartContext;
// Set ours.
currentDeserializeRead = vectorDeserPartContext.getDeserializeRead();
currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow();
// Clear the other ones.
currentPartDeserializer = null;
currentPartRawRowObjectInspector = null;
currentVectorAssign = null;
}
break;
case ROW_DESERIALIZE:
{
RowDeserializePartitionContext rowDeserPartContext = (RowDeserializePartitionContext) currentVectorPartContext;
// Clear the other ones.
currentDeserializeRead = null;
currentVectorDeserializeRow = null;
// Set ours.
currentPartDeserializer = rowDeserPartContext.getPartDeserializer();
currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector();
currentVectorAssign = rowDeserPartContext.getVectorAssign();
}
break;
default:
throw new RuntimeException("Unexpected VectorMapOperator read type " + currentReadType.name());
}
}
}
use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc in project hive by apache.
the class VectorMapOperator method createAndInitPartitionContext.
public VectorPartitionContext createAndInitPartitionContext(PartitionDesc partDesc, Configuration hconf) throws SerDeException, Exception {
VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
if (vectorPartDesc == null) {
return null;
}
VectorPartitionContext vectorPartitionContext;
VectorMapOperatorReadType vectorMapOperatorReadType = vectorPartDesc.getVectorMapOperatorReadType();
if (vectorMapOperatorReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || vectorMapOperatorReadType == VectorMapOperatorReadType.ROW_DESERIALIZE) {
// Verify hive.exec.schema.evolution is true or we have an ACID table so we are producing
// the table schema from ORC. The Vectorizer class assures this.
boolean isAcid = AcidUtils.isTablePropertyTransactional(partDesc.getTableDesc().getProperties());
Preconditions.checkState(Utilities.isSchemaEvolutionEnabled(hconf, isAcid));
}
switch(vectorMapOperatorReadType) {
case VECTORIZED_INPUT_FILE_FORMAT:
vectorPartitionContext = new VectorizedInputFileFormatPartitionContext(partDesc);
break;
case VECTOR_DESERIALIZE:
vectorPartitionContext = new VectorDeserializePartitionContext(partDesc);
break;
case ROW_DESERIALIZE:
vectorPartitionContext = new RowDeserializePartitionContext(partDesc);
break;
default:
throw new RuntimeException("Unexpected vector MapOperator read type " + vectorMapOperatorReadType.name());
}
vectorPartitionContext.init(hconf);
return vectorPartitionContext;
}
Aggregations