Search in sources :

Example 26 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestInputOutputFormat method testVectorizationWithAcid.

// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {
    StructObjectInspector inspector = new BigRowInspector();
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorizationAcid", inspector, true, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir, new AcidOutputFormat.Options(conf).maximumTransactionId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
    for (int i = 0; i < 100; ++i) {
        BigRow row = new BigRow(i);
        writer.insert(10, row);
    }
    writer.close(false);
    Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
    DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
    DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
    BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
    DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
    LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
    TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
    for (int i = 0; i < 100; i++) {
        assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]);
        assertEquals("checking byte " + i, (byte) i, byteColumn.vector[i]);
        assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
        assertEquals("checking int " + i, i, intColumn.vector[i]);
        assertEquals("checking long " + i, i, longColumn.vector[i]);
        assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
        assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
        Text strValue = new Text();
        strValue.set(stringColumn.vector[i], stringColumn.start[i], stringColumn.length[i]);
        assertEquals("checking string " + i, new Text(Long.toHexString(i)), strValue);
        assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal());
        assertEquals("checking date " + i, i, dateColumn.vector[i]);
        long millis = (long) i * MILLIS_IN_DAY;
        millis -= LOCAL_TIMEZONE.getOffset(millis);
        assertEquals("checking timestamp " + i, millis, timestampColumn.getTime(i));
    }
    assertEquals(false, reader.next(key, value));
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Path(org.apache.hadoop.fs.Path) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 27 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class RecordReaderImpl method nextTimestamp.

static TimestampWritable nextTimestamp(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        TimestampWritable result;
        if (previous == null || previous.getClass() != TimestampWritable.class) {
            result = new TimestampWritable();
        } else {
            result = (TimestampWritable) previous;
        }
        TimestampColumnVector tcv = (TimestampColumnVector) vector;
        result.setInternal(tcv.time[row], tcv.nanos[row]);
        return result;
    } else {
        return null;
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable)

Example 28 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorMathFunctions method getVectorizedRowBatchTimestampInDoubleOut.

public static VectorizedRowBatch getVectorizedRowBatchTimestampInDoubleOut(double[] doubleValues) {
    Random r = new Random(45993);
    VectorizedRowBatch batch = new VectorizedRowBatch(2);
    TimestampColumnVector tcv;
    DoubleColumnVector dcv;
    tcv = new TimestampColumnVector(doubleValues.length);
    dcv = new DoubleColumnVector(doubleValues.length);
    for (int i = 0; i < doubleValues.length; i++) {
        doubleValues[i] = r.nextDouble() % (double) SECONDS_LIMIT;
        dcv.vector[i] = doubleValues[i];
    }
    batch.cols[0] = tcv;
    batch.cols[1] = dcv;
    batch.size = doubleValues.length;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Random(java.util.Random)

Example 29 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorMathFunctions method getVectorizedRowBatchDoubleInTimestampOut.

public static VectorizedRowBatch getVectorizedRowBatchDoubleInTimestampOut() {
    VectorizedRowBatch batch = new VectorizedRowBatch(2);
    TimestampColumnVector tcv;
    DoubleColumnVector dcv;
    tcv = new TimestampColumnVector();
    dcv = new DoubleColumnVector();
    dcv.vector[0] = -1.5d;
    dcv.vector[1] = -0.5d;
    dcv.vector[2] = -0.1d;
    dcv.vector[3] = 0d;
    dcv.vector[4] = 0.5d;
    dcv.vector[5] = 0.7d;
    dcv.vector[6] = 1.5d;
    batch.cols[0] = dcv;
    batch.cols[1] = tcv;
    batch.size = 7;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 30 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorMathFunctions method getVectorizedRowBatchLongInTimestampOut.

public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) {
    Random r = new Random(12099);
    VectorizedRowBatch batch = new VectorizedRowBatch(2);
    LongColumnVector inV;
    TimestampColumnVector outV;
    inV = new LongColumnVector();
    outV = new TimestampColumnVector();
    for (int i = 0; i < longValues.length; i++) {
        longValues[i] = r.nextLong() % SECONDS_LIMIT;
        inV.vector[i] = longValues[i];
    }
    batch.cols[0] = inV;
    batch.cols[1] = outV;
    batch.size = longValues.length;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Random(java.util.Random) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)54 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)19 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)17 Timestamp (java.sql.Timestamp)13 Test (org.junit.Test)10 Random (java.util.Random)8 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)8 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)7 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)7 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)6 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3 BooleanWritable (org.apache.hadoop.io.BooleanWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 LongWritable (org.apache.hadoop.io.LongWritable)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)2