use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorUDFTimestampFieldString method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
BytesColumnVector inputCol = (BytesColumnVector) batch.cols[this.colNum];
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
// n != batch.size when isRepeating
return;
}
// true for all algebraic UDFs with no state
outV.isRepeating = inputCol.isRepeating;
if (inputCol.noNulls) {
outV.noNulls = true;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
outV.isNull[i] = false;
} catch (ParseException e) {
outV.noNulls = false;
outV.isNull[i] = true;
}
}
} else {
for (int i = 0; i < n; i++) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
outV.isNull[i] = false;
} catch (ParseException e) {
outV.noNulls = false;
outV.isNull[i] = true;
}
}
}
} else {
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
} catch (ParseException e) {
outV.isNull[i] = true;
}
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
} catch (ParseException e) {
outV.isNull[i] = true;
}
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorKeySeriesBytesSerialized method processBatch.
@Override
public void processBatch(VectorizedRowBatch batch) throws IOException {
currentBatchSize = batch.size;
Preconditions.checkState(currentBatchSize > 0);
BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnNum];
byte[][] vectorBytesArrays = bytesColVector.vector;
int[] vectorStarts = bytesColVector.start;
int[] vectorLengths = bytesColVector.length;
// The serialize routine uses this to build serializedKeyLengths.
outputStartPosition = 0;
output.reset();
if (bytesColVector.isRepeating) {
duplicateCounts[0] = currentBatchSize;
if (bytesColVector.noNulls || !bytesColVector.isNull[0]) {
seriesIsAllNull[0] = false;
serialize(0, vectorBytesArrays[0], vectorStarts[0], vectorLengths[0]);
nonNullKeyCount = 1;
} else {
seriesIsAllNull[0] = true;
nonNullKeyCount = 0;
}
seriesCount = 1;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
seriesCount = 0;
nonNullKeyCount = 0;
if (batch.selectedInUse) {
int[] selected = batch.selected;
if (bytesColVector.noNulls) {
duplicateCounts[0] = 1;
int index;
index = selected[0];
byte[] prevKeyBytes = vectorBytesArrays[index];
int prevKeyStart = vectorStarts[index];
int prevKeyLength = vectorLengths[index];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int logical = 1; logical < currentBatchSize; logical++) {
index = selected[logical];
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
nonNullKeyCount = seriesCount;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
boolean[] isNull = bytesColVector.isNull;
boolean prevKeyIsNull;
byte[] prevKeyBytes = null;
int prevKeyStart = 0;
int prevKeyLength = 0;
duplicateCounts[0] = 1;
int index = selected[0];
if (isNull[index]) {
seriesIsAllNull[0] = true;
prevKeyIsNull = true;
} else {
seriesIsAllNull[0] = false;
prevKeyIsNull = false;
prevKeyBytes = vectorBytesArrays[index];
prevKeyStart = vectorStarts[index];
prevKeyLength = vectorLengths[index];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
nonNullKeyCount = 1;
}
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int logical = 1; logical < currentBatchSize; logical++) {
index = selected[logical];
if (isNull[index]) {
if (prevKeyIsNull) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = true;
prevKeyIsNull = true;
}
} else {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = false;
serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyIsNull = false;
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
}
seriesCount++;
Preconditions.checkState(seriesCount <= currentBatchSize);
}
} else {
if (bytesColVector.noNulls) {
duplicateCounts[0] = 1;
byte[] prevKeyBytes = vectorBytesArrays[0];
int prevKeyStart = vectorStarts[0];
int prevKeyLength = vectorLengths[0];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
int currentKeyStart;
int currentKeyLength;
byte[] currentKeyBytes;
for (int index = 1; index < currentBatchSize; index++) {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
serialize(seriesCount, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
Arrays.fill(seriesIsAllNull, 0, ++seriesCount, false);
nonNullKeyCount = seriesCount;
Preconditions.checkState(seriesCount <= currentBatchSize);
} else {
boolean[] isNull = bytesColVector.isNull;
boolean prevKeyIsNull;
byte[] prevKeyBytes = null;
int prevKeyStart = 0;
int prevKeyLength = 0;
duplicateCounts[0] = 1;
if (isNull[0]) {
seriesIsAllNull[0] = true;
prevKeyIsNull = true;
} else {
seriesIsAllNull[0] = false;
prevKeyIsNull = false;
prevKeyBytes = vectorBytesArrays[0];
prevKeyStart = vectorStarts[0];
prevKeyLength = vectorLengths[0];
serialize(0, prevKeyBytes, prevKeyStart, prevKeyLength);
nonNullKeyCount = 1;
}
byte[] currentKeyBytes;
int currentKeyStart;
int currentKeyLength;
for (int index = 1; index < currentBatchSize; index++) {
if (isNull[index]) {
if (prevKeyIsNull) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = true;
prevKeyIsNull = true;
}
} else {
currentKeyBytes = vectorBytesArrays[index];
currentKeyStart = vectorStarts[index];
currentKeyLength = vectorLengths[index];
if (!prevKeyIsNull && StringExpr.equal(prevKeyBytes, prevKeyStart, prevKeyLength, currentKeyBytes, currentKeyStart, currentKeyLength)) {
duplicateCounts[seriesCount]++;
} else {
duplicateCounts[++seriesCount] = 1;
seriesIsAllNull[seriesCount] = false;
serialize(nonNullKeyCount++, currentKeyBytes, currentKeyStart, currentKeyLength);
prevKeyIsNull = false;
prevKeyBytes = currentKeyBytes;
prevKeyStart = currentKeyStart;
prevKeyLength = currentKeyLength;
}
}
}
seriesCount++;
Preconditions.checkState(seriesCount <= currentBatchSize);
}
}
}
// Finally.
computeSerializedHashCodes();
positionToFirst();
Preconditions.checkState(validate());
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorMapJoinCommonOperator method commonSetup.
/*
* Common one time setup by native vectorized map join operator's processOp.
*/
protected void commonSetup(VectorizedRowBatch batch) throws HiveException {
if (isLogDebugEnabled) {
LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin...");
displayBatchColumns(batch, "batch");
displayBatchColumns(overflowBatch, "overflowBatch");
}
// Make sure big table BytesColumnVectors have room for string values in the overflow batch...
for (int column : bigTableByteColumnVectorColumns) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column];
bytesColumnVector.initBuffer();
}
// overflow batchs...
for (int column : smallTableByteColumnVectorColumns) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column];
bytesColumnVector.initBuffer();
bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column];
bytesColumnVector.initBuffer();
}
// Setup a scratch batch that will be used to play back big table rows that were spilled
// to disk for the Hybrid Grace hash partitioning.
spillReplayBatch = VectorizedBatchUtil.makeLike(batch);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorUDAFBloomFilterMerge method processValue.
void processValue(Aggregation myagg, ColumnVector columnVector, int i) {
// columnVector entry is byte array representing serialized BloomFilter.
// BloomFilter.mergeBloomFilterBytes() does a simple byte ORing
// which should be faster than deserialize/merge.
BytesColumnVector inputColumn = (BytesColumnVector) columnVector;
BloomFilter.mergeBloomFilterBytes(myagg.bfBytes, 0, myagg.bfBytes.length, inputColumn.vector[i], inputColumn.start[i], inputColumn.length[i]);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class StringGroupConcatColCol method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1];
BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2];
BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
int[] sel = batch.selected;
int n = batch.size;
byte[][] vector1 = inV1.vector;
byte[][] vector2 = inV2.vector;
int[] len1 = inV1.length;
int[] len2 = inV2.length;
int[] start1 = inV1.start;
int[] start2 = inV2.start;
// return immediately if batch is empty
if (n == 0) {
return;
}
// prepare output buffer to accept results
outV.initBuffer();
/* Handle default case for isRepeating setting for output. This will be set to true
* later in the special cases where that is necessary.
*/
outV.isRepeating = false;
if (inV1.noNulls && !inV2.noNulls) {
// propagate nulls
/* We'll assume that there *may* be nulls in the input if !noNulls is true
* for an input vector. This is to be more forgiving of errors in loading
* the vectors. A properly-written vectorized iterator will make sure that
* isNull[0] is set if !noNulls and isRepeating are true for the vector.
*/
outV.noNulls = false;
if (inV2.isRepeating) {
if (inV2.isNull[0]) {
// Output will also be repeating and null
outV.isNull[0] = true;
outV.isRepeating = true;
//return as no further processing is needed
return;
}
} else {
propagateNulls(batch.selectedInUse, n, sel, inV2, outV);
}
// perform data operation
if (inV1.isRepeating && inV2.isRepeating) {
/* All must be selected otherwise size would be zero.
* Repeating property will not change.
*/
if (!inV2.isNull[0]) {
outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
}
outV.isRepeating = true;
} else if (inV1.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV2.isNull[0]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
}
} else if (inV2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
}
} else {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
}
}
} else if (!inV1.noNulls && inV2.noNulls) {
// propagate nulls
outV.noNulls = false;
if (inV1.isRepeating) {
//Output will also be repeating and null
outV.isRepeating = true;
outV.isNull[0] = true;
//return as no further processing is needed
return;
} else {
propagateNulls(batch.selectedInUse, n, sel, inV1, outV);
}
// perform data operation
if (inV1.isRepeating && inV2.isRepeating) {
//Repeating property will not change.
if (!inV1.isNull[0]) {
outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
}
outV.isRepeating = true;
} else if (inV1.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[0]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[0]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
}
} else if (inV2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
}
} else {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
}
}
} else if (!inV1.noNulls && !inV2.noNulls) {
// propagate nulls
outV.noNulls = false;
if (inV1.isRepeating && inV2.isRepeating) {
outV.isNull[0] = inV1.isNull[0] || inV2.isNull[0];
//Output will also be repeating
outV.isRepeating = true;
// return if output is null because no additional work is needed
if (outV.isNull[0]) {
return;
}
} else if (inV1.isRepeating) {
if (inV1.isNull[0]) {
// then all output will be null
outV.isRepeating = true;
outV.isNull[0] = true;
return;
} else {
outV.isRepeating = false;
propagateNulls(batch.selectedInUse, n, sel, inV2, outV);
}
} else if (inV2.isRepeating) {
if (inV2.isNull[0]) {
outV.isRepeating = true;
outV.isNull[0] = true;
return;
} else {
outV.isRepeating = false;
propagateNulls(batch.selectedInUse, n, sel, inV1, outV);
}
} else {
propagateNullsCombine(batch.selectedInUse, n, sel, inV1, inV2, outV);
}
// perform data operation
if (inV1.isRepeating && inV2.isRepeating) {
// All must be selected otherwise size would be zero. Repeating property will not change.
if (!inV1.isNull[0] && !inV2.isNull[0]) {
outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
}
outV.isRepeating = true;
} else if (inV1.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[0] && !inV2.isNull[i]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[0] && !inV2.isNull[i]) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
}
} else if (inV2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[i] && !inV2.isNull[0]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[i] && !inV2.isNull[0]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
}
} else {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!inV1.isNull[i] && !inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
} else {
for (int i = 0; i != n; i++) {
if (!inV1.isNull[i] && !inV2.isNull[i]) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
}
}
} else {
// there are no nulls in either input vector
// propagate null information
outV.noNulls = true;
// perform data operation
if (inV1.isRepeating && inV2.isRepeating) {
// All must be selected otherwise size would be zero. Repeating property will not change.
outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
outV.isRepeating = true;
} else if (inV1.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
} else {
for (int i = 0; i != n; i++) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
}
} else if (inV2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
} else {
for (int i = 0; i != n; i++) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
}
} else {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
} else {
for (int i = 0; i != n; i++) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
}
}
}
}
Aggregations