use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.
the class VectorizedOrcSerde method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector inspector) {
VectorizedRowBatch batch = (VectorizedRowBatch) obj;
try {
for (int i = 0; i < batch.size; i++) {
OrcStruct ost = orcStructArray[i];
if (ost == null) {
ost = new OrcStruct(batch.numCols);
orcStructArray[i] = ost;
}
int index = 0;
if (batch.selectedInUse) {
index = batch.selected[i];
} else {
index = i;
}
for (int p = 0; p < batch.projectionSize; p++) {
int k = batch.projectedColumns[p];
if (batch.cols[k].isRepeating) {
valueWriters[p].setValue(ost, batch.cols[k], 0);
} else {
valueWriters[p].setValue(ost, batch.cols[k], index);
}
}
OrcSerdeRow row = (OrcSerdeRow) orcRowArray[i];
row.realRow = ost;
row.inspector = inspector;
}
} catch (HiveException ex) {
throw new RuntimeException(ex);
}
ow.set(orcRowArray);
return ow;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.
the class SparkReduceRecordHandler method processVectors.
/**
* @param values
* @return true if it is not done and can take more inputs
*/
private <E> boolean processVectors(Iterator<E> values, byte tag) throws HiveException {
VectorizedRowBatch batch = batches[tag];
batch.reset();
buffer.reset();
/* deserialize key into columns */
VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer);
for (int i = 0; i < keysColumnOffset; i++) {
VectorizedBatchUtil.setRepeatingColumn(batch, i);
}
int rowIdx = 0;
try {
while (values.hasNext()) {
/* deserialize value into columns */
BytesWritable valueWritable = (BytesWritable) values.next();
Object valueObj = deserializeValue(valueWritable, tag);
VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx, keysColumnOffset, batch, buffer);
rowIdx++;
if (rowIdx >= BATCH_SIZE) {
VectorizedBatchUtil.setBatchSize(batch, rowIdx);
reducer.process(batch, tag);
rowIdx = 0;
if (isLogInfoEnabled) {
logMemoryInfo();
}
}
}
if (rowIdx > 0) {
VectorizedBatchUtil.setBatchSize(batch, rowIdx);
reducer.process(batch, tag);
}
if (isLogInfoEnabled) {
logMemoryInfo();
}
} catch (Exception e) {
String rowString = null;
try {
rowString = batch.toString();
} catch (Exception e2) {
rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]";
}
throw new HiveException("Error while processing vector batch (tag=" + tag + ") " + rowString, e);
}
// give me more
return true;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.
the class TestVectorGenericDateExpressions method testDateAddColCol.
private void testDateAddColCol(VectorExpression.Type colType1, boolean isPositive) {
LongColumnVector date1 = newRandomLongColumnVector(10000, size);
LongColumnVector days2 = newRandomLongColumnVector(1000, size);
ColumnVector col1 = castTo(date1, colType1);
LongColumnVector output = new LongColumnVector(size);
VectorizedRowBatch batch = new VectorizedRowBatch(3, size);
batch.cols[0] = col1;
batch.cols[1] = days2;
batch.cols[2] = output;
validateDateAdd(batch, date1, days2, colType1, isPositive);
TestVectorizedRowBatch.addRandomNulls(date1);
batch.cols[0] = castTo(date1, colType1);
validateDateAdd(batch, date1, days2, colType1, isPositive);
TestVectorizedRowBatch.addRandomNulls(days2);
batch.cols[1] = days2;
validateDateAdd(batch, date1, days2, colType1, isPositive);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.
the class TestVectorGenericDateExpressions method testDateAddColScalar.
private void testDateAddColScalar(VectorExpression.Type colType1, boolean isPositive) {
LongColumnVector date1 = newRandomLongColumnVector(10000, size);
ColumnVector col1 = castTo(date1, colType1);
long scalar2 = newRandom(1000);
LongColumnVector output = new LongColumnVector(size);
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
batch.cols[0] = col1;
batch.cols[1] = output;
validateDateAdd(batch, colType1, scalar2, isPositive, date1);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
validateDateAdd(batch, colType1, scalar2, isPositive, date1);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.
the class TestVectorGenericDateExpressions method testDateAddScalarCol.
private void testDateAddScalarCol(VectorExpression.Type colType1, boolean isPositive) {
LongColumnVector date2 = newRandomLongColumnVector(10000, size);
long scalar1 = newRandom(1000);
LongColumnVector output = new LongColumnVector(size);
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
batch.cols[0] = date2;
batch.cols[1] = output;
validateDateAdd(batch, scalar1, date2, colType1, isPositive);
TestVectorizedRowBatch.addRandomNulls(date2);
batch.cols[0] = date2;
validateDateAdd(batch, scalar1, date2, colType1, isPositive);
}
Aggregations