use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class SerDeEncodedDataReader method processOneSlice.
/** Unlike the other overload of processOneSlice, doesn't cache data. */
private boolean processOneSlice(Vectors diskData, boolean[] splitIncludes, int stripeIx, StripeData cacheData, long startTime) throws IOException {
if (diskData == null) {
// The other overload should have been used.
throw new AssertionError();
}
// LlapIoImpl.LOG.debug("diskData " + diskData);
logProcessOneSlice(stripeIx, diskData, cacheData);
if (cacheData == null && diskData.getRowCount() == 0) {
// Nothing to process.
return true;
}
ColumnEncoding[] cacheEncodings = cacheData == null ? null : cacheData.getEncodings();
LlapDataBuffer[][][] cacheBuffers = cacheData == null ? null : cacheData.getData();
if (cacheData != null) {
// Don't validate column count - no encodings for vectors.
validateCacheAndDisk(cacheData, diskData.getRowCount(), -1, diskData);
}
SerDeStripeMetadata metadata = new SerDeStripeMetadata(stripeIx);
metadata.setEncodings(Arrays.asList(cacheEncodings == null ? new ColumnEncoding[splitIncludes.length] : cacheEncodings));
metadata.setRowCount(diskData.getRowCount());
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Derived stripe metadata for this split is " + metadata);
}
consumer.setStripeMetadata(metadata);
OrcEncodedColumnBatch ecb = ECB_POOL.take();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
int vectorsIx = 0;
for (int colIx = 0; colIx < writerIncludes.length; ++colIx) {
if (!writerIncludes[colIx])
continue;
if (splitIncludes[colIx]) {
// Skip the 0-th column, since it won't have a vector after reading the text source.
if (colIx != 0) {
List<ColumnVector> vectors = diskData.getVectors(vectorsIx++);
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors);
}
ecb.initColumnWithVectors(colIx, vectors);
} else {
ecb.initColumn(0, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
}
} else {
ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
processColumnCacheData(cacheBuffers, ecb, colIx);
}
}
if (processStop()) {
recordReaderTime(startTime);
return false;
}
return sendEcbToConsumer(ecb, cacheData != null, null);
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class FakeVectorRowBatchFromLongIterables method produceNextBatch.
@Override
public VectorizedRowBatch produceNextBatch() {
batch.size = 0;
batch.selectedInUse = false;
for (int i = 0; i < numCols; ++i) {
ColumnVector col = batch.cols[i];
col.noNulls = true;
col.isRepeating = false;
}
while (!eof && batch.size < this.batchSize) {
int r = batch.size;
for (int i = 0; i < numCols; ++i) {
Iterator<Long> it = iterators.get(i);
if (!it.hasNext()) {
eof = true;
break;
}
LongColumnVector col = (LongColumnVector) batch.cols[i];
Long value = it.next();
if (null == value) {
col.noNulls = false;
col.isNull[batch.size] = true;
} else {
long[] vector = col.vector;
vector[r] = value;
col.isNull[batch.size] = false;
}
}
if (!eof) {
batch.size += 1;
}
}
return batch;
}
Aggregations