use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.
/**
* Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
*/
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
if (column.noNulls) {
column.noNulls = dictionaryIds.noNulls;
}
column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
switch(descriptor.getType()) {
case INT32:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
}
break;
case INT64:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
}
break;
case INT96:
final Calendar calendar;
if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
// Local time should be used if no timezone is specified
calendar = Calendar.getInstance();
} else {
calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
}
for (int i = rowId; i < rowId + num; ++i) {
ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
buf.order(ByteOrder.LITTLE_ENDIAN);
long timeOfDayNanos = buf.getLong();
int julianDay = buf.getInt();
NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
((TimestampColumnVector) column).set(i, ts);
}
break;
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
if (column instanceof BytesColumnVector) {
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
}
} else {
DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
for (int i = rowId; i < rowId + num; ++i) {
decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
}
}
break;
default:
throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class FilterStructColumnInList method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
final int logicalSize = batch.size;
if (logicalSize == 0) {
return;
}
if (buffer == null) {
buffer = new Output();
binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
}
for (VectorExpression ve : structExpressions) {
ve.evaluate(batch);
}
BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
try {
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < logicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < structColumnMap.length; f++) {
int fieldColumn = structColumnMap[f];
ColumnVector colVec = batch.cols[fieldColumn];
int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
switch(fieldVectorColumnTypes[f]) {
case BYTES:
{
BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
byte[] bytes = bytesColVec.vector[adjustedIndex];
int start = bytesColVec.start[adjustedIndex];
int length = bytesColVec.length[adjustedIndex];
binarySortableSerializeWrite.writeString(bytes, start, length);
}
break;
case LONG:
binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
break;
case DOUBLE:
binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
break;
case DECIMAL:
DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
break;
default:
throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
}
} else {
binarySortableSerializeWrite.writeNull();
}
}
scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
}
// Now, take the serialized keys we just wrote into our scratch column and look them
// up in the IN list.
super.evaluate(batch);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class StructColumnInList method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
final int logicalSize = batch.size;
if (logicalSize == 0) {
return;
}
if (buffer == null) {
buffer = new Output();
binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
}
for (VectorExpression ve : structExpressions) {
ve.evaluate(batch);
}
BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
try {
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < logicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < structColumnMap.length; f++) {
int fieldColumn = structColumnMap[f];
ColumnVector colVec = batch.cols[fieldColumn];
int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
switch(fieldVectorColumnTypes[f]) {
case BYTES:
{
BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
byte[] bytes = bytesColVec.vector[adjustedIndex];
int start = bytesColVec.start[adjustedIndex];
int length = bytesColVec.length[adjustedIndex];
binarySortableSerializeWrite.writeString(bytes, start, length);
}
break;
case LONG:
binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
break;
case DOUBLE:
binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
break;
case DECIMAL:
DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
break;
default:
throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
}
} else {
binarySortableSerializeWrite.writeNull();
}
}
scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
}
// Now, take the serialized keys we just wrote into our scratch column and look them
// up in the IN list.
super.evaluate(batch);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorKeySeriesBytesSerialized method processBatch.
@Override
public void processBatch(VectorizedRowBatch batch) throws IOException {
currentBatchSize = batch.size;
Preconditions.checkState(currentBatchSize > 0);
BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnNum];
byte[][] vectorBytesArrays = bytesColVector.vector;
int[] vectorStarts = bytesColVector.start;
int[] vectorLengths = bytesColVector.length;
// The serialize routine uses this to build serializedKeyLengths.
outputStartPosition = 0;
output.reset();
if (bytesColVector.isRepeating) {
duplicateCounts[0] = currentBatchSize;
if (bytesColVector.noNulls || !bytesColVector.isNull[0]) {
seriesIsAllNull[0] = false;
serialize(0, vectorBytesArrays[0], vectorStarts[0], vectorLengths[0]);
nonNullKeyCount = 1;
} else {
seriesIsAllNull[0] = true;
nonNullKeyCount = 0;
}
seriesCount = 1;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
seriesCount = 0;
nonNullKeyCount = 0;
if (batch.selectedInUse) {
int[] selected = batch.selected;
if (bytesColVector.noNulls) {
duplicateCounts[0] = 1;
int index;
index = selected[0];
byte[] prevKeyBytes = vectorBytesArrays[index];
int prevKeyStart = vectorStarts[index];
int prevKeyLength = vectorLengths[index];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int logical = 1; logical < currentBatchSize; logical++) {
index = selected[logical];
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
nonNullKeyCount = seriesCount;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
boolean[] isNull = bytesColVector.isNull;
boolean prevKeyIsNull;
byte[] prevKeyBytes = null;
int prevKeyStart = 0;
int prevKeyLength = 0;
duplicateCounts[0] = 1;
int index = selected[0];
if (isNull[index]) {
seriesIsAllNull[0] = true;
prevKeyIsNull = true;
} else {
seriesIsAllNull[0] = false;
prevKeyIsNull = false;
prevKeyBytes = vectorBytesArrays[index];
prevKeyStart = vectorStarts[index];
prevKeyLength = vectorLengths[index];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
nonNullKeyCount = 1;
}
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int logical = 1; logical < currentBatchSize; logical++) {
index = selected[logical];
if (isNull[index]) {
if (prevKeyIsNull) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = true;
prevKeyIsNull = true;
}
} else {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = false;
serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyIsNull = false;
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
}
seriesCount++;
Preconditions.checkState(seriesCount <= currentBatchSize);
}
} else {
if (bytesColVector.noNulls) {
duplicateCounts[0] = 1;
byte[] prevKeyBytes = vectorBytesArrays[0];
int prevKeyStart = vectorStarts[0];
int prevKeyLength = vectorLengths[0];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int index = 1; index < currentBatchSize; index++) {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
nonNullKeyCount = seriesCount;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
boolean[] isNull = bytesColVector.isNull;
boolean prevKeyIsNull;
byte[] prevKeyBytes = null;
int prevKeyStart = 0;
int prevKeyLength = 0;
duplicateCounts[0] = 1;
if (isNull[0]) {
seriesIsAllNull[0] = true;
prevKeyIsNull = true;
} else {
seriesIsAllNull[0] = false;
prevKeyIsNull = false;
prevKeyBytes = vectorBytesArrays[0];
prevKeyStart = vectorStarts[0];
prevKeyLength = vectorLengths[0];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
nonNullKeyCount = 1;
}
byte[] currentKeyBytes;
int currentKeyStart;
int currentKeyLength;
for (int index = 1; index < currentBatchSize; index++) {
if (isNull[index]) {
if (prevKeyIsNull) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = true;
prevKeyIsNull = true;
}
} else {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = false;
serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyIsNull = false;
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
}
seriesCount++;
Preconditions.checkState(seriesCount <= currentBatchSize);
}
}
}
// Finally.
computeSerializedHashCodes();
positionToFirst();
Preconditions.checkState(validate());
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorExpressionWriters method testSetterText.
private void testSetterText(TypeInfo type) throws HiveException {
Text t1 = new Text("alpha");
Text t2 = new Text("beta");
BytesColumnVector bcv = new BytesColumnVector(vectorSize);
bcv.noNulls = false;
bcv.initBuffer();
bcv.setVal(0, t1.getBytes(), 0, t1.getLength());
bcv.isNull[1] = true;
bcv.setVal(2, t2.getBytes(), 0, t2.getLength());
bcv.isNull[3] = true;
bcv.setVal(4, t1.getBytes(), 0, t1.getLength());
Object[] values = new Object[this.vectorSize];
VectorExpressionWriter vew = getWriter(type);
for (int i = 0; i < vectorSize; i++) {
// setValue() should be able to handle null input
values[i] = null;
Writable w = (Writable) vew.setValue(values[i], bcv, i);
if (w != null) {
byte[] val = new byte[bcv.length[i]];
System.arraycopy(bcv.vector[i], bcv.start[i], val, 0, bcv.length[i]);
Writable expected = getWritableValue(type, val);
Assert.assertEquals(expected, w);
} else {
Assert.assertTrue(bcv.isNull[i]);
}
}
}
Aggregations