use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.
the class Vectorizer method debugDisplayAllMaps.
public void debugDisplayAllMaps(BaseWork work) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames();
LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames));
LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos));
LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount);
LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.
the class VectorDeserializeOrcWriter method createVrbCtx.
private static VectorizedRowBatchCtx createVrbCtx(StructObjectInspector oi, final Properties tblProps, final Configuration conf) throws IOException {
final boolean useDecimal64ColumnVectors = HiveConf.getVar(conf, ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
final String serde = tblProps.getProperty(serdeConstants.SERIALIZATION_LIB);
final String inputFormat = tblProps.getProperty(hive_metastoreConstants.FILE_INPUT_FORMAT);
final boolean isTextFormat = inputFormat != null && inputFormat.equals(TextInputFormat.class.getName()) && serde != null && serde.equals(LazySimpleSerDe.class.getName());
List<DataTypePhysicalVariation> dataTypePhysicalVariations = new ArrayList<>();
if (isTextFormat) {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
int dataColumnCount = structTypeInfo.getAllStructFieldTypeInfos().size();
for (int i = 0; i < dataColumnCount; i++) {
DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
if (useDecimal64ColumnVectors) {
TypeInfo typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(i);
if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
if (HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision())) {
dataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64;
}
}
}
dataTypePhysicalVariations.add(dataTypePhysicalVariation);
}
}
VectorizedRowBatchCtx vrbCtx = new VectorizedRowBatchCtx();
try {
vrbCtx.init(oi, new String[0]);
} catch (HiveException e) {
throw new IOException(e);
}
if (!dataTypePhysicalVariations.isEmpty()) {
vrbCtx.setRowDataTypePhysicalVariations(dataTypePhysicalVariations.toArray(new DataTypePhysicalVariation[0]));
}
return vrbCtx;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.
the class KryoBench method mockMapWork.
public static MapWork mockMapWork(String tableName, int partitions, ObjectInspector objectInspector) throws Exception {
Path root = new Path("/warehouse", tableName);
String[] partPath = new String[partitions];
StringBuilder buffer = new StringBuilder();
for (int p = 0; p < partitions; ++p) {
partPath[p] = new Path(root, "p=" + p).toString();
if (p != 0) {
buffer.append(',');
}
buffer.append(partPath[p]);
}
StringBuilder columnIds = new StringBuilder();
StringBuilder columnNames = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
StructObjectInspector structOI = (StructObjectInspector) objectInspector;
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
int numCols = fields.size();
for (int i = 0; i < numCols; ++i) {
if (i != 0) {
columnIds.append(',');
columnNames.append(',');
columnTypes.append(',');
}
columnIds.append(i);
columnNames.append(fields.get(i).getFieldName());
columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
}
Properties tblProps = new Properties();
tblProps.put("name", tableName);
tblProps.put("serialization.lib", OrcSerde.class.getName());
tblProps.put("columns", columnNames.toString());
tblProps.put("columns.types", columnTypes.toString());
TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
MapWork mapWork = new MapWork();
mapWork.setVectorMode(true);
VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
vectorizedRowBatchCtx.init(structOI, new String[0]);
mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
mapWork.setUseBucketizedHiveInputFormat(false);
Map<Path, List<String>> aliasMap = new LinkedHashMap<>();
List<String> aliases = new ArrayList<String>();
aliases.add(tableName);
LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
for (int p = 0; p < partitions; ++p) {
Path path = new Path(partPath[p]);
aliasMap.put(path, aliases);
LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
PartitionDesc part = new PartitionDesc(tbl, partSpec);
part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false, null));
partMap.put(path, part);
}
mapWork.setPathToAliases(aliasMap);
mapWork.setPathToPartitionInfo(partMap);
return mapWork;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.
the class ColumnarStorageBench method initialVectorizedRowBatchCtx.
private void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
MapWork mapWork = new MapWork();
VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
rbCtx.init(createStructObjectInspector(conf), new String[0]);
mapWork.setVectorMode(true);
mapWork.setVectorizedRowBatchCtx(rbCtx);
Utilities.setMapWork(conf, mapWork);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.
the class AggregationBase method doVectorTest.
protected static boolean doVectorTest(String aggregationName, TypeInfo typeInfo, GenericUDAFEvaluator evaluator, TypeInfo outputTypeInfo, GenericUDAFEvaluator.Mode udafEvaluatorMode, int maxKeyCount, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> parameterList, VectorRandomBatchSource batchSource, Object[] results) throws Exception {
HiveConf hiveConf = new HiveConf();
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
ImmutablePair<VectorAggregationDesc, String> pair = Vectorizer.getVectorAggregationDesc(aggregationName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vectorizationContext);
VectorAggregationDesc vecAggrDesc = pair.left;
if (vecAggrDesc == null) {
Assert.fail("No vector aggregation expression found for aggregationName " + aggregationName + " udafEvaluatorMode " + udafEvaluatorMode + " parameterList " + parameterList + " outputTypeInfo " + outputTypeInfo);
}
Class<? extends VectorAggregateExpression> vecAggrClass = vecAggrDesc.getVecAggrClass();
Constructor<? extends VectorAggregateExpression> ctor = null;
try {
ctor = vecAggrClass.getConstructor(VectorAggregationDesc.class);
} catch (Exception e) {
throw new HiveException("Constructor " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) not available");
}
VectorAggregateExpression vecAggrExpr = null;
try {
vecAggrExpr = ctor.newInstance(vecAggrDesc);
} catch (Exception e) {
throw new HiveException("Failed to create " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) object ", e);
}
VectorExpression.doTransientInit(vecAggrExpr.getInputExpression(), hiveConf);
// System.out.println("*VECTOR AGGREGATION EXPRESSION* " + vecAggrExpr.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" aggregationTestMode VECTOR_MODE" +
" vecAggrExpr " + vecAggrExpr.getClass().getSimpleName());
*/
VectorRandomRowSource rowSource = batchSource.getRowSource();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// Last entry is for a NULL key.
VectorAggregationBufferRow[] vectorAggregationBufferRows = new VectorAggregationBufferRow[maxKeyCount + 1];
VectorAggregationBufferRow[] batchBufferRows;
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[0];
batchBufferRows = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
final int size = batch.size;
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = (selectedInUse ? selected[logical] : logical);
final int keyAdjustedBatchIndex;
if (keyLongColVector.isRepeating) {
keyAdjustedBatchIndex = 0;
} else {
keyAdjustedBatchIndex = batchIndex;
}
final short key;
if (keyLongColVector.noNulls || !keyLongColVector.isNull[keyAdjustedBatchIndex]) {
key = (short) keyLongColVector.vector[keyAdjustedBatchIndex];
} else {
key = (short) maxKeyCount;
}
VectorAggregationBufferRow bufferRow = vectorAggregationBufferRows[key];
if (bufferRow == null) {
VectorAggregateExpression.AggregationBuffer aggregationBuffer = vecAggrExpr.getNewAggregationBuffer();
aggregationBuffer.reset();
VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[] { aggregationBuffer };
bufferRow = new VectorAggregationBufferRow(aggregationBuffers);
vectorAggregationBufferRows[key] = bufferRow;
}
batchBufferRows[logical] = bufferRow;
}
vecAggrExpr.aggregateInputSelection(batchBufferRows, 0, batch);
rowIndex += batch.size;
}
String[] outputColumnNames = new String[] { "output" };
TypeInfo[] outputTypeInfos = new TypeInfo[] { outputTypeInfo };
VectorizedRowBatchCtx outputBatchContext = new VectorizedRowBatchCtx(outputColumnNames, outputTypeInfos, new DataTypePhysicalVariation[] { vecAggrExpr.getOutputDataTypePhysicalVariation() }, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, new String[0], new DataTypePhysicalVariation[0]);
VectorizedRowBatch outputBatch = outputBatchContext.createVectorizedRowBatch();
short[] keys = new short[VectorizedRowBatch.DEFAULT_SIZE];
VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { 0 });
Object[] scrqtchRow = new Object[1];
for (short key = 0; key < maxKeyCount + 1; key++) {
VectorAggregationBufferRow vectorAggregationBufferRow = vectorAggregationBufferRows[key];
if (vectorAggregationBufferRow != null) {
if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
outputBatch.reset();
}
keys[outputBatch.size] = key;
VectorAggregateExpression.AggregationBuffer aggregationBuffer = vectorAggregationBufferRow.getAggregationBuffer(0);
vecAggrExpr.assignRowColumn(outputBatch, outputBatch.size++, 0, aggregationBuffer);
}
}
if (outputBatch.size > 0) {
extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
}
return true;
}
Aggregations