Search in sources :

Example 41 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
    OrcStruct row = null;
    long lastRowNumber = -1;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
        for (int i = 0; i < batch.size; i++) {
            Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber() + i);
            lastRowNumber = rr.getRowNumber();
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritableV2) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritableV2 t = ((TimestampWritableV2) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    java.sql.Timestamp ts = tcv.asScratchTimestamp(rowId);
                    Assert.assertEquals(t.getTimestamp(), Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()));
                } else if (a instanceof DateWritableV2) {
                    // Dates are stored as long, so convert and compare
                    DateWritableV2 adt = (DateWritableV2) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().toEpochMilli(), DateWritableV2.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 42 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorizedColumnReaderTestBase method decimalRead.

protected void decimalRead(boolean isDictionaryEncoding) throws Exception {
    Configuration readerConf = new Configuration();
    readerConf.set(IOConstants.COLUMNS, "value");
    readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
    readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    VectorizedParquetRecordReader reader = createTestParquetReader("message hive_schema { required value (DECIMAL(5,2));}", readerConf);
    VectorizedRowBatch previous = reader.createValue();
    try {
        int c = 0;
        while (reader.next(NullWritable.get(), previous)) {
            DecimalColumnVector vector = (DecimalColumnVector) previous.cols[0];
            assertTrue(vector.noNulls);
            for (int i = 0; i < vector.vector.length; i++) {
                if (c == nElements) {
                    break;
                }
                assertEquals("Check failed at pos " + c, getDecimal(isDictionaryEncoding, c), vector.vector[i].getHiveDecimal());
                assertFalse(vector.isNull[i]);
                c++;
            }
        }
        assertEquals(nElements, c);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) Configuration(org.apache.hadoop.conf.Configuration) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Example 43 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class StructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 44 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorUDFMapIndexDecimalCol method findInMap.

@Override
public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, MapColumnVector mapColumnVector, int mapBatchIndex) {
    final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
    final int count = (int) mapColumnVector.lengths[mapBatchIndex];
    HiveDecimalWritable[] keys = ((DecimalColumnVector) mapColumnVector.keys).vector;
    final HiveDecimalWritable index = ((DecimalColumnVector) indexColumnVector).vector[indexBatchIndex];
    for (int i = 0; i < count; i++) {
        if (index.compareTo(keys[offset + i]) == 0) {
            return offset + i;
        }
    }
    return -1;
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Example 45 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorUDAFAvgDecimal method aggregateInputSelection.

@Override
public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int bufferIndex, VectorizedRowBatch batch) throws HiveException {
    int batchSize = batch.size;
    if (batchSize == 0) {
        return;
    }
    inputExpression.evaluate(batch);
    DecimalColumnVector inputVector = (DecimalColumnVector) batch.cols[this.inputExpression.getOutputColumn()];
    HiveDecimalWritable[] vector = inputVector.vector;
    if (inputVector.noNulls) {
        if (inputVector.isRepeating) {
            iterateNoNullsRepeatingWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize);
        } else {
            if (batch.selectedInUse) {
                iterateNoNullsSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batch.selected, batchSize);
            } else {
                iterateNoNullsWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize);
            }
        }
    } else {
        if (inputVector.isRepeating) {
            if (batch.selectedInUse) {
                iterateHasNullsRepeatingSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize, batch.selected, inputVector.isNull);
            } else {
                iterateHasNullsRepeatingWithAggregationSelection(aggregationBufferSets, bufferIndex, vector[0], batchSize, inputVector.isNull);
            }
        } else {
            if (batch.selectedInUse) {
                iterateHasNullsSelectionWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize, batch.selected, inputVector.isNull);
            } else {
                iterateHasNullsWithAggregationSelection(aggregationBufferSets, bufferIndex, vector, batchSize, inputVector.isNull);
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Aggregations

DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)108 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)38 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)28 Test (org.junit.Test)28 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)27 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)25 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)25 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)23 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)18 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)16 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)14 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)7 Timestamp (java.sql.Timestamp)5 Random (java.util.Random)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 IOException (java.io.IOException)2 DateColumnVector (org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)2 Decimal64ColumnVector (org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector)2