Search in sources :

Example 61 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class FuncTimestampToLong method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    if (n == 0) {
        // Nothing to do
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            func(outputColVector, inputColVector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    func(outputColVector, inputColVector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                func(outputColVector, inputColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 62 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class IfExprTimestampScalarScalarBase method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    if (arg1ColVector.isRepeating) {
        if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
            outputColVector.fill(arg2Scalar);
        } else {
            outputColVector.fill(arg3Scalar);
        }
        return;
    }
    if (arg1ColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
            }
        }
    } else /* there are nulls */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = false;
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar);
            }
        } else {
            Arrays.fill(outputIsNull, 0, n, false);
            for (int i = 0; i != n; i++) {
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar);
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 63 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class FuncDecimalToTimestamp method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    if (n == 0) {
        // Nothing to do
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            func(outputColVector, inputColVector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    func(outputColVector, inputColVector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                func(outputColVector, inputColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)

Example 64 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateDiffColCol.

@Test
public void testDateDiffColCol() throws HiveException {
    for (PrimitiveCategory colType1 : dateTimestampStringTypes) {
        for (PrimitiveCategory colType2 : dateTimestampStringTypes) {
            LongColumnVector date1 = newRandomLongColumnVector(10000, size);
            LongColumnVector date2 = newRandomLongColumnVector(10000, size);
            LongColumnVector output = new LongColumnVector(size);
            VectorizedRowBatch batch = new VectorizedRowBatch(3, size);
            batch.cols[0] = castTo(date1, colType1);
            batch.cols[1] = castTo(date2, colType2);
            batch.cols[2] = output;
            validateDateDiff(batch, date1, date2, colType1, colType2);
            TestVectorizedRowBatch.addRandomNulls(date1);
            batch.cols[0] = castTo(date1, colType1);
            validateDateDiff(batch, date1, date2, colType1, colType2);
            TestVectorizedRowBatch.addRandomNulls(date2);
            batch.cols[1] = castTo(date2, colType2);
            validateDateDiff(batch, date1, date2, colType1, colType2);
        }
    }
    VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2);
    VectorizedRowBatch batch = new VectorizedRowBatch(3, 1);
    BytesColumnVector bcv;
    byte[] bytes = "error".getBytes(utf8);
    udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo });
    udf.transientInit();
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new TimestampColumnVector(1);
    batch.cols[2] = new LongColumnVector(1);
    bcv = (BytesColumnVector) batch.cols[0];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[2].isNull[0], true);
    udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo });
    udf.transientInit();
    batch.cols[0] = new TimestampColumnVector(1);
    batch.cols[1] = new BytesColumnVector(1);
    batch.cols[2] = new LongColumnVector(1);
    bcv = (BytesColumnVector) batch.cols[1];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[2].isNull[0], true);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 65 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestInputOutputFormat method testVectorizationWithAcid.

// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {
    StructObjectInspector inspector = new BigRowInspector();
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorizationAcid", inspector, true, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir, new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
    for (int i = 0; i < 100; ++i) {
        BigRow row = new BigRow(i);
        writer.insert(10, row);
    }
    writer.close(false);
    Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
    DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
    DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
    BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
    DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
    LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
    TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
    for (int i = 0; i < 100; i++) {
        assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]);
        assertEquals("checking byte " + i, (byte) i, byteColumn.vector[i]);
        assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
        assertEquals("checking int " + i, i, intColumn.vector[i]);
        assertEquals("checking long " + i, i, longColumn.vector[i]);
        assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
        assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
        Text strValue = new Text();
        strValue.set(stringColumn.vector[i], stringColumn.start[i], stringColumn.length[i]);
        assertEquals("checking string " + i, new Text(Long.toHexString(i)), strValue);
        assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal());
        assertEquals("checking date " + i, i, dateColumn.vector[i]);
        long millis = (long) i * MILLIS_IN_DAY;
        millis -= LOCAL_TIMEZONE.getOffset(millis);
        assertEquals("checking timestamp " + i, millis, timestampColumn.getTime(i));
    }
    assertEquals(false, reader.next(key, value));
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)66 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)24 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)22 Timestamp (java.sql.Timestamp)17 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)11 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)11 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)11 Test (org.junit.Test)11 Random (java.util.Random)9 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)7 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)5 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 BooleanWritable (org.apache.hadoop.io.BooleanWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 LongWritable (org.apache.hadoop.io.LongWritable)3