use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class TestVectorTypeCasts method testCastTimestampToDouble.
@Test
public void testCastTimestampToDouble() {
double[] doubleValues = new double[500];
VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInDoubleOut(doubleValues);
TimestampColumnVector inV = (TimestampColumnVector) b.cols[0];
DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
b.cols[0].noNulls = true;
VectorExpression expr = new CastTimestampToDouble(0, 1);
expr.evaluate(b);
for (int i = 0; i < doubleValues.length; i++) {
double actual = resultV.vector[i];
double doubleValue = TimestampUtils.getDouble(inV.asScratchTimestamp(i));
assertEquals(actual, doubleValue, 0.000000001F);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class TestVectorizedORCReader method checkVectorizedReader.
private void checkVectorizedReader() throws Exception {
Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
VectorizedRowBatch batch = reader.getSchema().createRowBatch();
OrcStruct row = null;
// Check Vectorized ORC reader against ORC row reader
while (vrr.nextBatch(batch)) {
for (int i = 0; i < batch.size; i++) {
row = (OrcStruct) rr.next(row);
for (int j = 0; j < batch.cols.length; j++) {
Object a = (row.getFieldValue(j));
ColumnVector cv = batch.cols[j];
// if the value is repeating, use row 0
int rowId = cv.isRepeating ? 0 : i;
// make sure the null flag agrees
if (a == null) {
Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
} else if (a instanceof BooleanWritable) {
// Boolean values are stores a 1's and 0's, so convert and compare
Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(temp.toString(), Long.toString(b));
} else if (a instanceof TimestampWritable) {
// Timestamps are stored as long, so convert and compare
TimestampWritable t = ((TimestampWritable) a);
TimestampColumnVector tcv = ((TimestampColumnVector) cv);
Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId));
} else if (a instanceof DateWritable) {
// Dates are stored as long, so convert and compare
DateWritable adt = (DateWritable) a;
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) b));
} else if (a instanceof HiveDecimalWritable) {
// Decimals are stored as BigInteger, so convert and compare
HiveDecimalWritable dec = (HiveDecimalWritable) a;
HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
Assert.assertEquals(dec, b);
} else if (a instanceof DoubleWritable) {
double b = ((DoubleColumnVector) cv).vector[rowId];
assertEquals(a.toString(), Double.toString(b));
} else if (a instanceof Text) {
BytesColumnVector bcv = (BytesColumnVector) cv;
Text b = new Text();
b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
assertEquals(a, b);
} else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
} else {
assertEquals("huh", a.getClass().getName());
}
}
}
// Check repeating
Assert.assertEquals(false, batch.cols[0].isRepeating);
Assert.assertEquals(false, batch.cols[1].isRepeating);
Assert.assertEquals(false, batch.cols[2].isRepeating);
Assert.assertEquals(true, batch.cols[3].isRepeating);
Assert.assertEquals(false, batch.cols[4].isRepeating);
Assert.assertEquals(false, batch.cols[5].isRepeating);
Assert.assertEquals(false, batch.cols[6].isRepeating);
Assert.assertEquals(false, batch.cols[7].isRepeating);
Assert.assertEquals(false, batch.cols[8].isRepeating);
Assert.assertEquals(false, batch.cols[9].isRepeating);
// Check non null
Assert.assertEquals(false, batch.cols[0].noNulls);
Assert.assertEquals(false, batch.cols[1].noNulls);
Assert.assertEquals(true, batch.cols[2].noNulls);
Assert.assertEquals(true, batch.cols[3].noNulls);
Assert.assertEquals(false, batch.cols[4].noNulls);
Assert.assertEquals(false, batch.cols[5].noNulls);
Assert.assertEquals(false, batch.cols[6].noNulls);
Assert.assertEquals(false, batch.cols[7].noNulls);
Assert.assertEquals(false, batch.cols[8].noNulls);
Assert.assertEquals(false, batch.cols[9].noNulls);
}
Assert.assertEquals(false, rr.nextBatch(batch));
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class CastDoubleToTimestamp method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
this.evaluateChildren(batch);
}
DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn];
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
double[] vector = inputColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
if (inputColVector.isRepeating) {
//All must be selected otherwise size would be zero
//Repeating property will not change.
setDouble(outputColVector, vector, 0);
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
setDouble(outputColVector, vector, i);
}
} else {
for (int i = 0; i != n; i++) {
setDouble(outputColVector, vector, i);
}
}
outputColVector.isRepeating = false;
} else /* there are nulls */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
setDouble(outputColVector, vector, i);
outputIsNull[i] = inputIsNull[i];
}
} else {
for (int i = 0; i != n; i++) {
setDouble(outputColVector, vector, i);
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
outputColVector.isRepeating = false;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.
/**
* Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
*/
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
if (column.noNulls) {
column.noNulls = dictionaryIds.noNulls;
}
column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
switch(descriptor.getType()) {
case INT32:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
}
break;
case INT64:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
}
break;
case INT96:
final Calendar calendar;
if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
// Local time should be used if no timezone is specified
calendar = Calendar.getInstance();
} else {
calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
}
for (int i = rowId; i < rowId + num; ++i) {
ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
buf.order(ByteOrder.LITTLE_ENDIAN);
long timeOfDayNanos = buf.getLong();
int julianDay = buf.getInt();
NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
((TimestampColumnVector) column).set(i, ts);
}
break;
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
if (column instanceof BytesColumnVector) {
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
}
} else {
DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
for (int i = rowId; i < rowId + num; ++i) {
decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
}
}
break;
default:
throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class IfExprTimestampColumnColumnBase method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column];
TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column];
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
// may override later
outputColVector.isRepeating = false;
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
/* All the code paths below propagate nulls even if neither arg2 nor arg3
* have nulls. This is to reduce the number of code paths and shorten the
* code, at the expense of maybe doing unnecessary work if neither input
* has nulls. This could be improved in the future by expanding the number
* of code paths.
*/
if (arg1ColVector.isRepeating) {
if (vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
}
return;
}
// extend any repeating values and noNulls indicator in the inputs
arg2ColVector.flatten(batch.selectedInUse, sel, n);
arg3ColVector.flatten(batch.selectedInUse, sel, n);
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
}
} else /* there are nulls */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
}
}
// restore repeating and no nulls indicators
arg2ColVector.unFlatten();
arg3ColVector.unFlatten();
}
Aggregations