use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class CastStringToDate method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
if (n == 0) {
// Nothing to do
return;
}
if (inV.noNulls) {
outV.noNulls = true;
if (inV.isRepeating) {
outV.isRepeating = true;
evaluate(outV, inV, 0);
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
evaluate(outV, inV, i);
}
outV.isRepeating = false;
} else {
for (int i = 0; i != n; i++) {
evaluate(outV, inV, i);
}
outV.isRepeating = false;
}
} else {
// Handle case with nulls. Don't do function if the value is null,
// because the data may be undefined for a null value.
outV.noNulls = false;
if (inV.isRepeating) {
outV.isRepeating = true;
outV.isNull[0] = inV.isNull[0];
if (!inV.isNull[0]) {
evaluate(outV, inV, 0);
}
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outV.isNull[i] = inV.isNull[i];
if (!inV.isNull[i]) {
evaluate(outV, inV, i);
}
}
outV.isRepeating = false;
} else {
System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
for (int i = 0; i != n; i++) {
if (!inV.isNull[i]) {
evaluate(outV, inV, i);
}
}
outV.isRepeating = false;
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class CastStringToIntervalYearMonth method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
if (n == 0) {
// Nothing to do
return;
}
if (inV.noNulls) {
outV.noNulls = true;
if (inV.isRepeating) {
outV.isRepeating = true;
evaluate(outV, inV, 0);
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
evaluate(outV, inV, i);
}
outV.isRepeating = false;
} else {
for (int i = 0; i != n; i++) {
evaluate(outV, inV, i);
}
outV.isRepeating = false;
}
} else {
// Handle case with nulls. Don't do function if the value is null,
// because the data may be undefined for a null value.
outV.noNulls = false;
if (inV.isRepeating) {
outV.isRepeating = true;
outV.isNull[0] = inV.isNull[0];
if (!inV.isNull[0]) {
evaluate(outV, inV, 0);
}
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outV.isNull[i] = inV.isNull[i];
if (!inV.isNull[i]) {
evaluate(outV, inV, i);
}
}
outV.isRepeating = false;
} else {
System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
for (int i = 0; i != n; i++) {
if (!inV.isNull[i]) {
evaluate(outV, inV, i);
}
}
outV.isRepeating = false;
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class AbstractFilterStringColLikeStringScalar method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (checker == null) {
checker = createChecker(pattern);
}
if (childExpressions != null) {
super.evaluateChildren(batch);
}
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
int[] sel = batch.selected;
boolean[] nullPos = inputColVector.isNull;
int n = batch.size;
byte[][] vector = inputColVector.vector;
int[] length = inputColVector.length;
int[] start = inputColVector.start;
// return immediately if batch is empty
if (n == 0) {
return;
}
if (inputColVector.noNulls) {
if (inputColVector.isRepeating) {
// All must be selected otherwise size would be zero Repeating property will not change.
if (!checker.check(vector[0], start[0], length[0])) {
// Entire batch is filtered out.
batch.size = 0;
}
} else if (batch.selectedInUse) {
int newSize = 0;
for (int j = 0; j != n; j++) {
int i = sel[j];
if (checker.check(vector[i], start[i], length[i])) {
sel[newSize++] = i;
}
}
batch.size = newSize;
} else {
int newSize = 0;
for (int i = 0; i != n; i++) {
if (checker.check(vector[i], start[i], length[i])) {
sel[newSize++] = i;
}
}
if (newSize < n) {
batch.size = newSize;
batch.selectedInUse = true;
}
}
} else {
if (inputColVector.isRepeating) {
//All must be selected otherwise size would be zero. Repeating property will not change.
if (!nullPos[0]) {
if (!checker.check(vector[0], start[0], length[0])) {
//Entire batch is filtered out.
batch.size = 0;
}
} else {
batch.size = 0;
}
} else if (batch.selectedInUse) {
int newSize = 0;
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!nullPos[i]) {
if (checker.check(vector[i], start[i], length[i])) {
sel[newSize++] = i;
}
}
}
//Change the selected vector
batch.size = newSize;
} else {
int newSize = 0;
for (int i = 0; i != n; i++) {
if (!nullPos[i]) {
if (checker.check(vector[i], start[i], length[i])) {
sel[newSize++] = i;
}
}
}
if (newSize < n) {
batch.size = newSize;
batch.selectedInUse = true;
}
/* If every row qualified (newSize==n), then we can ignore the sel vector to streamline
* future operations. So selectedInUse will remain false.
*/
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorMapJoinLeftSemiStringOperator method process.
//---------------------------------------------------------------------------
// Process Single-Column String Left-Semi Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column String members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column String hash set information for this specialized class.
*/
hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
needHashTableSetup = false;
}
batchCounter++;
// For left semi joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column String specific declarations.
*/
// The one join column for this specialized class.
BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
byte[][] vector = joinColVector.vector;
int[] start = joinColVector.start;
int[] length = joinColVector.length;
/*
* Single-Column Long check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column String specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (!joinColVector.noNulls && joinColVector.isNull[0]) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
byte[] keyBytes = vector[0];
int keyStart = start[0];
int keyLength = length[0];
joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]);
}
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashSetResultCount = 0;
int allMatchCount = 0;
int spillCount = 0;
/*
* Single-Column String specific variables.
*/
int saveKeyBatchIndex = -1;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Single-Column String get key.
*/
// Implicit -- use batchIndex.
boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column String specific save key and lookup.
*/
saveKeyBatchIndex = batchIndex;
/*
* Single-Column String specific lookup key.
*/
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]);
}
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.
/**
* Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
*/
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
if (column.noNulls) {
column.noNulls = dictionaryIds.noNulls;
}
column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
switch(descriptor.getType()) {
case INT32:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
}
break;
case INT64:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
}
break;
case INT96:
final Calendar calendar;
if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
// Local time should be used if no timezone is specified
calendar = Calendar.getInstance();
} else {
calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
}
for (int i = rowId; i < rowId + num; ++i) {
ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
buf.order(ByteOrder.LITTLE_ENDIAN);
long timeOfDayNanos = buf.getLong();
int julianDay = buf.getInt();
NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
((TimestampColumnVector) column).set(i, ts);
}
break;
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
if (column instanceof BytesColumnVector) {
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
}
} else {
DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
for (int i = rowId; i < rowId + num; ++i) {
decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
}
}
break;
default:
throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
}
}
Aggregations