use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class FuncTimestampToLong method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
// Nothing to do
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
if (inputColVector.isRepeating) {
if (inputColVector.noNulls || !inputIsNull[0]) {
// Set isNull before call in case it changes it mind.
outputIsNull[0] = false;
func(outputColVector, inputColVector, 0);
} else {
outputIsNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
return;
}
if (inputColVector.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
func(outputColVector, inputColVector, i);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
func(outputColVector, inputColVector, i);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
func(outputColVector, inputColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
func(outputColVector, inputColVector, i);
}
}
} else {
System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for (int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
func(outputColVector, inputColVector, i);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class IfExprTimestampScalarScalarBase method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
if (arg1ColVector.isRepeating) {
if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
outputColVector.fill(arg3Scalar);
}
return;
}
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
}
} else /* there are nulls */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputIsNull[i] = false;
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
} else {
Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class FuncDecimalToTimestamp method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
// Nothing to do
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
if (inputColVector.isRepeating) {
if (inputColVector.noNulls || !inputIsNull[0]) {
// Set isNull before call in case it changes it mind.
outputIsNull[0] = false;
func(outputColVector, inputColVector, 0);
} else {
outputIsNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
return;
}
if (inputColVector.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
func(outputColVector, inputColVector, i);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
func(outputColVector, inputColVector, i);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
func(outputColVector, inputColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
func(outputColVector, inputColVector, i);
}
}
} else {
System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for (int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
func(outputColVector, inputColVector, i);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class TestVectorGenericDateExpressions method testDateDiffColCol.
@Test
public void testDateDiffColCol() throws HiveException {
for (PrimitiveCategory colType1 : dateTimestampStringTypes) {
for (PrimitiveCategory colType2 : dateTimestampStringTypes) {
LongColumnVector date1 = newRandomLongColumnVector(10000, size);
LongColumnVector date2 = newRandomLongColumnVector(10000, size);
LongColumnVector output = new LongColumnVector(size);
VectorizedRowBatch batch = new VectorizedRowBatch(3, size);
batch.cols[0] = castTo(date1, colType1);
batch.cols[1] = castTo(date2, colType2);
batch.cols[2] = output;
validateDateDiff(batch, date1, date2, colType1, colType2);
TestVectorizedRowBatch.addRandomNulls(date1);
batch.cols[0] = castTo(date1, colType1);
validateDateDiff(batch, date1, date2, colType1, colType2);
TestVectorizedRowBatch.addRandomNulls(date2);
batch.cols[1] = castTo(date2, colType2);
validateDateDiff(batch, date1, date2, colType1, colType2);
}
}
VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2);
VectorizedRowBatch batch = new VectorizedRowBatch(3, 1);
BytesColumnVector bcv;
byte[] bytes = "error".getBytes(utf8);
udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo });
udf.transientInit();
batch.cols[0] = new BytesColumnVector(1);
batch.cols[1] = new TimestampColumnVector(1);
batch.cols[2] = new LongColumnVector(1);
bcv = (BytesColumnVector) batch.cols[0];
bcv.vector[0] = bytes;
bcv.start[0] = 0;
bcv.length[0] = bytes.length;
udf.evaluate(batch);
Assert.assertEquals(batch.cols[2].isNull[0], true);
udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo });
udf.transientInit();
batch.cols[0] = new TimestampColumnVector(1);
batch.cols[1] = new BytesColumnVector(1);
batch.cols[2] = new LongColumnVector(1);
bcv = (BytesColumnVector) batch.cols[1];
bcv.vector[0] = bytes;
bcv.start[0] = 0;
bcv.length[0] = bytes.length;
udf.evaluate(batch);
Assert.assertEquals(batch.cols[2].isNull[0], true);
}
use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.
the class TestInputOutputFormat method testVectorizationWithAcid.
// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {
StructObjectInspector inspector = new BigRowInspector();
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorizationAcid", inspector, true, 1);
// write the orc file to the mock file system
Path partDir = new Path(conf.get("mapred.input.dir"));
OrcRecordUpdater writer = new OrcRecordUpdater(partDir, new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
for (int i = 0; i < 100; ++i) {
BigRow row = new BigRow(i);
writer.insert(10, row);
}
writer.close(false);
Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
setBlocks(path, conf, new MockBlock("host0", "host1"));
// call getsplits
HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 10);
assertEquals(1, splits.length);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
NullWritable key = reader.createKey();
VectorizedRowBatch value = reader.createValue();
assertEquals(true, reader.next(key, value));
assertEquals(100, value.count());
LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
LongColumnVector intColumn = (LongColumnVector) value.cols[3];
LongColumnVector longColumn = (LongColumnVector) value.cols[4];
DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
for (int i = 0; i < 100; i++) {
assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]);
assertEquals("checking byte " + i, (byte) i, byteColumn.vector[i]);
assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
assertEquals("checking int " + i, i, intColumn.vector[i]);
assertEquals("checking long " + i, i, longColumn.vector[i]);
assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
Text strValue = new Text();
strValue.set(stringColumn.vector[i], stringColumn.start[i], stringColumn.length[i]);
assertEquals("checking string " + i, new Text(Long.toHexString(i)), strValue);
assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal());
assertEquals("checking date " + i, i, dateColumn.vector[i]);
long millis = (long) i * MILLIS_IN_DAY;
millis -= LOCAL_TIMEZONE.getOffset(millis);
assertEquals("checking timestamp " + i, millis, timestampColumn.getTime(i));
}
assertEquals(false, reader.next(key, value));
}
Aggregations