Search in sources :

Example 41 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class CastStringToDouble method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    if (n == 0) {
        // Nothing to do
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            func(outputColVector, inputColVector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    func(outputColVector, inputColVector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                func(outputColVector, inputColVector, i);
            }
        }
    } else /* there are NULLs in the inputColVector */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!inputColVector.isNull[i]) {
                    // Set isNull before call in case it changes it mind.
                    outputColVector.isNull[i] = false;
                    func(outputColVector, inputColVector, i);
                } else {
                    outputColVector.isNull[i] = true;
                    outputColVector.noNulls = false;
                }
            }
        } else {
            System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    // Set isNull before call in case it changes it mind.
                    outputColVector.isNull[i] = false;
                    func(outputColVector, inputColVector, i);
                } else {
                    outputColVector.isNull[i] = true;
                    outputColVector.noNulls = false;
                }
            }
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 42 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class DoubleColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    if (inSet == null) {
        inSet = new CuckooSetDouble(inListValues.length);
        inSet.load(inListValues);
    }
    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    double[] vector = inputColVector.vector;
    long[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
            }
        }
    } else /* there are NULLs in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = inputIsNull[i];
                if (!inputIsNull[i]) {
                    outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
                }
            }
        } else {
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputIsNull[i]) {
                    outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
                }
            }
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 43 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method doubleReadLong.

protected void doubleReadLong(boolean isDictionaryEncoding) throws Exception {
    Configuration c = new Configuration();
    c.set(IOConstants.COLUMNS, "int64_field");
    c.set(IOConstants.COLUMNS_TYPES, "double");
    c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int64 int64_field;}", c);
    VectorizedRowBatch previous = reader.createValue();
    try {
        int count = 0;
        while (reader.next(NullWritable.get(), previous)) {
            DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
            assertTrue(vector.noNulls);
            for (int i = 0; i < vector.vector.length; i++) {
                if (count == nElements) {
                    break;
                }
                assertEquals("Failed at " + count, getLongValue(isDictionaryEncoding, count), vector.vector[i], 0);
                assertFalse(vector.isNull[i]);
                count++;
            }
        }
        assertEquals(nElements, count);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Example 44 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method nestedStructRead0.

protected void nestedStructRead0(boolean isDictionaryEncoding) throws Exception {
    Configuration conf = new Configuration();
    conf.set(IOConstants.COLUMNS, "nested_struct_field");
    conf.set(IOConstants.COLUMNS_TYPES, "struct<nsf:struct<c:int,d:int>,e:double>");
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    String schema = "message hive_schema {\n" + "group nested_struct_field {\n" + "  optional group nsf {\n" + "    optional int32 c;\n" + "    optional int32 d;\n" + "  }" + "optional double e;\n" + "}\n";
    VectorizedParquetRecordReader reader = createTestParquetReader(schema, conf);
    VectorizedRowBatch previous = reader.createValue();
    int c = 0;
    try {
        while (reader.next(NullWritable.get(), previous)) {
            StructColumnVector vector = (StructColumnVector) previous.cols[0];
            StructColumnVector sv = (StructColumnVector) vector.fields[0];
            LongColumnVector cv = (LongColumnVector) sv.fields[0];
            LongColumnVector dv = (LongColumnVector) sv.fields[1];
            DoubleColumnVector ev = (DoubleColumnVector) vector.fields[1];
            for (int i = 0; i < cv.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals(getIntValue(isDictionaryEncoding, c), cv.vector[i]);
                assertEquals(getIntValue(isDictionaryEncoding, c), dv.vector[i]);
                assertEquals(getDoubleValue(isDictionaryEncoding, c), ev.vector[i], 0);
                assertFalse(vector.isNull[i]);
                assertFalse(vector.isRepeating);
                c++;
            }
        }
        assertEquals("It doesn't exit at expected position", nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 45 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method floatRead.

private void floatRead(boolean isDictionaryEncoding, Configuration conf) throws Exception {
    VectorizedParquetRecordReader reader = createTestParquetReader("message test { required float float_field;}", conf);
    VectorizedRowBatch previous = reader.createValue();
    try {
        int c = 0;
        while (reader.next(NullWritable.get(), previous)) {
            DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
            assertTrue(vector.noNulls);
            for (int i = 0; i < vector.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals("Failed at " + c, getFloatValue(isDictionaryEncoding, c), vector.vector[i], 0);
                assertFalse(vector.isNull[i]);
                c++;
            }
        }
        assertEquals(nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)101 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)58 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)31 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)17 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)16 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)11 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)9 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)6 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 Timestamp (java.sql.Timestamp)3 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)2