Examples with BytesColumnVector - org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector

Example 1 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
   * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
   */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    switch(descriptor.getType()) {
        case INT32:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
            }
            break;
        case INT64:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case INT96:
            final Calendar calendar;
            if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
                // Local time should be used if no timezone is specified
                calendar = Calendar.getInstance();
            } else {
                calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
            }
            for (int i = rowId; i < rowId + num; ++i) {
                ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
                buf.order(ByteOrder.LITTLE_ENDIAN);
                long timeOfDayNanos = buf.getLong();
                int julianDay = buf.getInt();
                NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
                Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
                ((TimestampColumnVector) column).set(i, ts);
            }
            break;
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            if (column instanceof BytesColumnVector) {
                for (int i = rowId; i < rowId + num; ++i) {
                    ((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
                }
            } else {
                DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
                decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
                decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
                for (int i = rowId; i < rowId + num; ++i) {
                    decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
                }
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
}

Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Calendar(java.util.Calendar) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 2 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class FilterStructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 3 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Example 4 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorKeySeriesBytesSerialized method processBatch.

@Override
public void processBatch(VectorizedRowBatch batch) throws IOException {
    currentBatchSize = batch.size;
    Preconditions.checkState(currentBatchSize > 0);
    BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnNum];
    byte[][] vectorBytesArrays = bytesColVector.vector;
    int[] vectorStarts = bytesColVector.start;
    int[] vectorLengths = bytesColVector.length;
    // The serialize routine uses this to build serializedKeyLengths.
    outputStartPosition = 0;
    output.reset();
    if (bytesColVector.isRepeating) {
        duplicateCounts[0] = currentBatchSize;
        if (bytesColVector.noNulls || !bytesColVector.isNull[0]) {
            seriesIsAllNull[0] = false;
            serialize(0, vectorBytesArrays[0], vectorStarts[0], vectorLengths[0]);
            nonNullKeyCount = 1;
        } else {
            seriesIsAllNull[0] = true;
            nonNullKeyCount = 0;
        }
        seriesCount = 1;
        Preconditions.checkState(seriesCount <= currentBatchSize);
    } else {
        seriesCount = 0;
        nonNullKeyCount = 0;
        if (batch.selectedInUse) {
            int[] selected = batch.selected;
            if (bytesColVector.noNulls) {
                duplicateCounts[0] = 1;
                int index;
                index = selected[0];
                byte[] prevKeyBytes = vectorBytesArrays[index];
                int prevKeyStart = vectorStarts[index];
                int prevKeyLength = vectorLengths[index];
                serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
                int currentKeyStart;
                int currentKeyLength;
                byte[] currentKeyBytes;
                for (int logical = 1; logical < currentBatchSize; logical++) {
                    index = selected[logical];
                    currentKeyBytes = vectorBytesArrays[index];
                    currentKeyStart = vectorStarts[index];
                    currentKeyLength = vectorLengths[index];
                    if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
                        duplicateCounts[seriesCount]++;
                    } else {
                        duplicateCounts[++seriesCount] = 1;
                        serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
                        prevKeyBytes = currentKeyBytes;
                        prevKeyStart = currentKeyStart;
                        prevKeyLength = currentKeyLength;
                    }
                }
                Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
                nonNullKeyCount = seriesCount;
                Preconditions.checkState(seriesCount <= currentBatchSize);
            } else {
                boolean[] isNull = bytesColVector.isNull;
                boolean prevKeyIsNull;
                byte[] prevKeyBytes = null;
                int prevKeyStart = 0;
                int prevKeyLength = 0;
                duplicateCounts[0] = 1;
                int index = selected[0];
                if (isNull[index]) {
                    seriesIsAllNull[0] = true;
                    prevKeyIsNull = true;
                } else {
                    seriesIsAllNull[0] = false;
                    prevKeyIsNull = false;
                    prevKeyBytes = vectorBytesArrays[index];
                    prevKeyStart = vectorStarts[index];
                    prevKeyLength = vectorLengths[index];
                    serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
                    nonNullKeyCount = 1;
                }
                int currentKeyStart;
                int currentKeyLength;
                byte[] currentKeyBytes;
                for (int logical = 1; logical < currentBatchSize; logical++) {
                    index = selected[logical];
                    if (isNull[index]) {
                        if (prevKeyIsNull) {
                            duplicateCounts[seriesCount]++;
                        } else {
                            duplicateCounts[++seriesCount] = 1;
                            seriesIsAllNull[seriesCount] = true;
                            prevKeyIsNull = true;
                        }
                    } else {
                        currentKeyBytes = vectorBytesArrays[index];
                        currentKeyStart = vectorStarts[index];
                        currentKeyLength = vectorLengths[index];
                        if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
                            duplicateCounts[seriesCount]++;
                        } else {
                            duplicateCounts[++seriesCount] = 1;
                            seriesIsAllNull[seriesCount] = false;
                            serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
                            prevKeyIsNull = false;
                            prevKeyBytes = currentKeyBytes;
                            prevKeyStart = currentKeyStart;
                            prevKeyLength = currentKeyLength;
                        }
                    }
                }
                seriesCount++;
                Preconditions.checkState(seriesCount <= currentBatchSize);
            }
        } else {
            if (bytesColVector.noNulls) {
                duplicateCounts[0] = 1;
                byte[] prevKeyBytes = vectorBytesArrays[0];
                int prevKeyStart = vectorStarts[0];
                int prevKeyLength = vectorLengths[0];
                serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
                int currentKeyStart;
                int currentKeyLength;
                byte[] currentKeyBytes;
                for (int index = 1; index < currentBatchSize; index++) {
                    currentKeyBytes = vectorBytesArrays[index];
                    currentKeyStart = vectorStarts[index];
                    currentKeyLength = vectorLengths[index];
                    if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
                        duplicateCounts[seriesCount]++;
                    } else {
                        duplicateCounts[++seriesCount] = 1;
                        serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
                        prevKeyBytes = currentKeyBytes;
                        prevKeyStart = currentKeyStart;
                        prevKeyLength = currentKeyLength;
                    }
                }
                Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
                nonNullKeyCount = seriesCount;
                Preconditions.checkState(seriesCount <= currentBatchSize);
            } else {
                boolean[] isNull = bytesColVector.isNull;
                boolean prevKeyIsNull;
                byte[] prevKeyBytes = null;
                int prevKeyStart = 0;
                int prevKeyLength = 0;
                duplicateCounts[0] = 1;
                if (isNull[0]) {
                    seriesIsAllNull[0] = true;
                    prevKeyIsNull = true;
                } else {
                    seriesIsAllNull[0] = false;
                    prevKeyIsNull = false;
                    prevKeyBytes = vectorBytesArrays[0];
                    prevKeyStart = vectorStarts[0];
                    prevKeyLength = vectorLengths[0];
                    serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
                    nonNullKeyCount = 1;
                }
                byte[] currentKeyBytes;
                int currentKeyStart;
                int currentKeyLength;
                for (int index = 1; index < currentBatchSize; index++) {
                    if (isNull[index]) {
                        if (prevKeyIsNull) {
                            duplicateCounts[seriesCount]++;
                        } else {
                            duplicateCounts[++seriesCount] = 1;
                            seriesIsAllNull[seriesCount] = true;
                            prevKeyIsNull = true;
                        }
                    } else {
                        currentKeyBytes = vectorBytesArrays[index];
                        currentKeyStart = vectorStarts[index];
                        currentKeyLength = vectorLengths[index];
                        if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
                            duplicateCounts[seriesCount]++;
                        } else {
                            duplicateCounts[++seriesCount] = 1;
                            seriesIsAllNull[seriesCount] = false;
                            serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
                            prevKeyIsNull = false;
                            prevKeyBytes = currentKeyBytes;
                            prevKeyStart = currentKeyStart;
                            prevKeyLength = currentKeyLength;
                        }
                    }
                }
                seriesCount++;
                Preconditions.checkState(seriesCount <= currentBatchSize);
            }
        }
    }
    // Finally.
    computeSerializedHashCodes();
    positionToFirst();
    Preconditions.checkState(validate());
}

Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 5 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorExpressionWriters method testSetterText.

private void testSetterText(TypeInfo type) throws HiveException {
    Text t1 = new Text("alpha");
    Text t2 = new Text("beta");
    BytesColumnVector bcv = new BytesColumnVector(vectorSize);
    bcv.noNulls = false;
    bcv.initBuffer();
    bcv.setVal(0, t1.getBytes(), 0, t1.getLength());
    bcv.isNull[1] = true;
    bcv.setVal(2, t2.getBytes(), 0, t2.getLength());
    bcv.isNull[3] = true;
    bcv.setVal(4, t1.getBytes(), 0, t1.getLength());
    Object[] values = new Object[this.vectorSize];
    VectorExpressionWriter vew = getWriter(type);
    for (int i = 0; i < vectorSize; i++) {
        // setValue() should be able to handle null input
        values[i] = null;
        Writable w = (Writable) vew.setValue(values[i], bcv, i);
        if (w != null) {
            byte[] val = new byte[bcv.length[i]];
            System.arraycopy(bcv.vector[i], bcv.start[i], val, 0, bcv.length[i]);
            Writable expected = getWritableValue(type, val);
            Assert.assertEquals(expected, w);
        } else {
            Assert.assertTrue(bcv.isNull[i]);
        }
    }
}

Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) Text(org.apache.hadoop.io.Text)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)154 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)74 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)60 Test (org.junit.Test)51 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)16 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)15 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)14 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)10 ArrayList (java.util.ArrayList)9 Text (org.apache.hadoop.io.Text)8 IOException (java.io.IOException)7 Configuration (org.apache.hadoop.conf.Configuration)6 Timestamp (java.sql.Timestamp)5 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)5 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)5 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 TypeDescription (org.apache.orc.TypeDescription)4