Search in sources :

Example 91 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class FuncRandNoSeed method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        this.evaluateChildren(batch);
    }
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    int n = batch.size;
    double[] outputVector = outputColVector.vector;
    outputColVector.isRepeating = false;
    boolean[] outputIsNull = outputColVector.isNull;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    if (batch.selectedInUse) {
        if (!outputColVector.noNulls) {
            for (int j = 0; j != n; j++) {
                final int i = sel[j];
                // Set isNull before call in case it changes it mind.
                outputIsNull[i] = false;
                outputVector[i] = random.nextDouble();
            }
        } else {
            for (int j = 0; j != n; j++) {
                final int i = sel[j];
                outputVector[i] = random.nextDouble();
            }
        }
    } else {
        if (!outputColVector.noNulls) {
            // Assume it is almost always a performance win to fill all of isNull so we can
            // safely reset noNulls.
            Arrays.fill(outputIsNull, false);
            outputColVector.noNulls = true;
        }
        for (int i = 0; i != n; i++) {
            outputVector[i] = random.nextDouble();
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 92 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class TestVectorFilterExpressions method testFilterDoubleIn.

@Test
public void testFilterDoubleIn() throws HiveException {
    int seed = 17;
    VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(5, 2, seed);
    DoubleColumnVector dcv0 = new DoubleColumnVector();
    vrb.cols[0] = dcv0;
    double[] inList = { 5.0, 20.2 };
    FilterDoubleColumnInList f = new FilterDoubleColumnInList(0);
    f.setInListValues(inList);
    f.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.doubleTypeInfo });
    f.transientInit();
    VectorExpression expr1 = f;
    // Basic sanity check. Other cases are not skipped because it is similar to the case for Long.
    dcv0.vector[0] = 5.0;
    dcv0.vector[1] = 20.2;
    dcv0.vector[2] = 17.0;
    dcv0.vector[3] = 15.0;
    dcv0.vector[4] = 10.0;
    expr1.evaluate(vrb);
    assertEquals(2, vrb.size);
    assertTrue(vrb.selectedInUse);
    assertEquals(0, vrb.selected[0]);
    assertEquals(1, vrb.selected[1]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Test(org.junit.Test)

Example 93 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class TestVectorFilterExpressions method testFilterDoubleBetween.

@Test
public void testFilterDoubleBetween() {
    // Spot check only. null & repeating behavior are checked elsewhere for the same template.
    int seed = 17;
    VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(5, 2, seed);
    DoubleColumnVector dcv0 = new DoubleColumnVector();
    vrb.cols[0] = dcv0;
    // Basic case
    dcv0.vector[0] = 5;
    dcv0.vector[1] = 20;
    dcv0.vector[2] = 17;
    dcv0.vector[3] = 15;
    dcv0.vector[4] = 10;
    VectorExpression expr = new FilterDoubleColumnBetween(0, 20, 21);
    expr.evaluate(vrb);
    assertEquals(1, vrb.size);
    assertTrue(vrb.selectedInUse);
    assertEquals(1, vrb.selected[0]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) FilterDoubleColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween) Test(org.junit.Test)

Example 94 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class TestInputOutputFormat method testVectorizationWithAcid.

// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {
    StructObjectInspector inspector = new BigRowInspector();
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorizationAcid", inspector, true, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir, new AcidOutputFormat.Options(conf).maximumWriteId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
    for (int i = 0; i < 100; ++i) {
        BigRow row = new BigRow(i);
        writer.insert(10, row);
    }
    writer.close(false);
    Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
    DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
    DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
    BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
    DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
    LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
    TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
    for (int i = 0; i < 100; i++) {
        assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]);
        assertEquals("checking byte " + i, (byte) i, byteColumn.vector[i]);
        assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
        assertEquals("checking int " + i, i, intColumn.vector[i]);
        assertEquals("checking long " + i, i, longColumn.vector[i]);
        assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
        assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
        Text strValue = new Text();
        strValue.set(stringColumn.vector[i], stringColumn.start[i], stringColumn.length[i]);
        assertEquals("checking string " + i, new Text(Long.toHexString(i)), strValue);
        assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal());
        assertEquals("checking date " + i, i, dateColumn.vector[i]);
        long millis = (long) i * MILLIS_IN_DAY;
        millis -= LOCAL_TIMEZONE.getOffset(millis);
        assertEquals("checking timestamp " + i, millis, timestampColumn.getTime(i));
    }
    assertEquals(false, reader.next(key, value));
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 95 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method floatReadLong.

protected void floatReadLong(boolean isDictionaryEncoding) throws Exception {
    Configuration c = new Configuration();
    c.set(IOConstants.COLUMNS, "int64_field");
    c.set(IOConstants.COLUMNS_TYPES, "float");
    c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int64 int64_field;}", c);
    VectorizedRowBatch previous = reader.createValue();
    try {
        int count = 0;
        while (reader.next(NullWritable.get(), previous)) {
            DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
            assertTrue(vector.noNulls);
            for (int i = 0; i < vector.vector.length; i++) {
                if (count == nElements) {
                    break;
                }
                assertEquals("Failed at " + count, getLongValue(isDictionaryEncoding, count), vector.vector[i], 0);
                assertFalse(vector.isNull[i]);
                count++;
            }
        }
        assertEquals(nElements, count);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)104 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)59 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)33 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)13 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)10 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)7 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 Timestamp (java.sql.Timestamp)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2