use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.
the class TestVectorizedORCReader method checkVectorizedReader.
private void checkVectorizedReader() throws Exception {
Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
OrcStruct row = null;
long lastRowNumber = -1;
// Check Vectorized ORC reader against ORC row reader
while (vrr.nextBatch(batch)) {
Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
for (int i = 0; i < batch.size; i++) {
Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber() + i);
lastRowNumber = rr.getRowNumber();
row = (OrcStruct) rr.next(row);
for (int j = 0; j < batch.cols.length; j++) {
Object a = (row.getFieldValue(j));
ColumnVector cv = batch.cols[j];
// if the value is repeating, use row 0
int rowId = cv.isRepeating ? 0 : i;
// make sure the null flag agrees
if (a == null) {
Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
} else if (a instanceof BooleanWritable) {
// Boolean values are stores a 1's and 0's, so convert and compare
Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(temp.toString(), Long.toString(b));
} else if (a instanceof TimestampWritableV2) {
// Timestamps are stored as long, so convert and compare
TimestampWritableV2 t = ((TimestampWritableV2) a);
TimestampColumnVector tcv = ((TimestampColumnVector) cv);
java.sql.Timestamp ts = tcv.asScratchTimestamp(rowId);
Assert.assertEquals(t.getTimestamp(), Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()));
} else if (a instanceof DateWritableV2) {
// Dates are stored as long, so convert and compare
DateWritableV2 adt = (DateWritableV2) a;
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(adt.get().toEpochMilli(), DateWritableV2.daysToMillis((int) b));
} else if (a instanceof HiveDecimalWritable) {
// Decimals are stored as BigInteger, so convert and compare
HiveDecimalWritable dec = (HiveDecimalWritable) a;
HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
Assert.assertEquals(dec, b);
} else if (a instanceof DoubleWritable) {
double b = ((DoubleColumnVector) cv).vector[rowId];
assertEquals(a.toString(), Double.toString(b));
} else if (a instanceof Text) {
BytesColumnVector bcv = (BytesColumnVector) cv;
Text b = new Text();
b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
assertEquals(a, b);
} else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
} else {
assertEquals("huh", a.getClass().getName());
}
}
}
// Check repeating
Assert.assertEquals(false, batch.cols[0].isRepeating);
Assert.assertEquals(false, batch.cols[1].isRepeating);
Assert.assertEquals(false, batch.cols[2].isRepeating);
Assert.assertEquals(true, batch.cols[3].isRepeating);
Assert.assertEquals(false, batch.cols[4].isRepeating);
Assert.assertEquals(false, batch.cols[5].isRepeating);
Assert.assertEquals(false, batch.cols[6].isRepeating);
Assert.assertEquals(false, batch.cols[7].isRepeating);
Assert.assertEquals(false, batch.cols[8].isRepeating);
Assert.assertEquals(false, batch.cols[9].isRepeating);
// Check non null
Assert.assertEquals(false, batch.cols[0].noNulls);
Assert.assertEquals(false, batch.cols[1].noNulls);
Assert.assertEquals(true, batch.cols[2].noNulls);
Assert.assertEquals(true, batch.cols[3].noNulls);
Assert.assertEquals(false, batch.cols[4].noNulls);
Assert.assertEquals(false, batch.cols[5].noNulls);
Assert.assertEquals(false, batch.cols[6].noNulls);
Assert.assertEquals(false, batch.cols[7].noNulls);
Assert.assertEquals(false, batch.cols[8].noNulls);
Assert.assertEquals(false, batch.cols[9].noNulls);
}
Assert.assertEquals(false, rr.nextBatch(batch));
}
use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.
the class VectorizedColumnReaderTestBase method decimalRead.
protected void decimalRead(boolean isDictionaryEncoding) throws Exception {
Configuration readerConf = new Configuration();
readerConf.set(IOConstants.COLUMNS, "value");
readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message hive_schema { required value (DECIMAL(5,2));}", readerConf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DecimalColumnVector vector = (DecimalColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Check failed at pos " + c, getDecimal(isDictionaryEncoding, c), vector.vector[i].getHiveDecimal());
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.
the class StructColumnInList method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
final int logicalSize = batch.size;
if (logicalSize == 0) {
return;
}
if (buffer == null) {
buffer = new Output();
binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
}
for (VectorExpression ve : structExpressions) {
ve.evaluate(batch);
}
BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
try {
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < logicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < structColumnMap.length; f++) {
int fieldColumn = structColumnMap[f];
ColumnVector colVec = batch.cols[fieldColumn];
int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
switch(fieldVectorColumnTypes[f]) {
case BYTES:
{
BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
byte[] bytes = bytesColVec.vector[adjustedIndex];
int start = bytesColVec.start[adjustedIndex];
int length = bytesColVec.length[adjustedIndex];
binarySortableSerializeWrite.writeString(bytes, start, length);
}
break;
case LONG:
binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
break;
case DOUBLE:
binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
break;
case DECIMAL:
DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
break;
default:
throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
}
} else {
binarySortableSerializeWrite.writeNull();
}
}
scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
}
// Now, take the serialized keys we just wrote into our scratch column and look them
// up in the IN list.
super.evaluate(batch);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.
the class VectorUDFMapIndexDecimalCol method findInMap.
@Override
public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, MapColumnVector mapColumnVector, int mapBatchIndex) {
final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
final int count = (int) mapColumnVector.lengths[mapBatchIndex];
HiveDecimalWritable[] keys = ((DecimalColumnVector) mapColumnVector.keys).vector;
final HiveDecimalWritable index = ((DecimalColumnVector) indexColumnVector).vector[indexBatchIndex];
for (int i = 0; i < count; i++) {
if (index.compareTo(keys[offset + i]) == 0) {
return offset + i;
}
}
return -1;
}
use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.
the class VectorUDAFAvgDecimal method aggregateInputSelection.
@Override
public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int bufferIndex, VectorizedRowBatch batch) throws HiveException {
int batchSize = batch.size;
if (batchSize == 0) {
return;
}
inputExpression.evaluate(batch);
DecimalColumnVector inputVector = (DecimalColumnVector) batch.cols[this.inputExpression.getOutputColumn()];
HiveDecimalWritable[] vector = inputVector.vector;
if (inputVector.noNulls) {
if (inputVector.isRepeating) {
iterateNoNullsRepeatingWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize);
} else {
if (batch.selectedInUse) {
iterateNoNullsSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batch.selected, batchSize);
} else {
iterateNoNullsWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize);
}
}
} else {
if (inputVector.isRepeating) {
if (batch.selectedInUse) {
iterateHasNullsRepeatingSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize, batch.selected, inputVector.isNull);
} else {
iterateHasNullsRepeatingWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize, inputVector.isNull);
}
} else {
if (batch.selectedInUse) {
iterateHasNullsSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize, batch.selected, inputVector.isNull);
} else {
iterateHasNullsWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize, inputVector.isNull);
}
}
}
}
Aggregations