use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class TestVectorUDFAdaptor method getBatchStrDblLongWithStrOut.
private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
VectorizedRowBatch b = new VectorizedRowBatch(4);
BytesColumnVector strCol = new BytesColumnVector();
LongColumnVector longCol = new LongColumnVector();
DoubleColumnVector dblCol = new DoubleColumnVector();
BytesColumnVector outCol = new BytesColumnVector();
b.cols[0] = strCol;
b.cols[1] = longCol;
b.cols[2] = dblCol;
b.cols[3] = outCol;
strCol.initBuffer();
strCol.setVal(0, blue, 0, blue.length);
strCol.setVal(1, red, 0, red.length);
longCol.vector[0] = 0;
longCol.vector[1] = 1;
dblCol.vector[0] = 0.0;
dblCol.vector[1] = 1.0;
// set one null value for possible later use
longCol.isNull[1] = true;
// but have no nulls initially
longCol.noNulls = true;
strCol.noNulls = true;
dblCol.noNulls = true;
outCol.initBuffer();
b.size = 2;
return b;
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorPTFEvaluatorDoubleMin method evaluateGroupBatch.
public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
evaluateInputExpr(batch);
// Determine minimum of all non-null double column values; maintain isGroupResultNull.
// We do not filter when PTF is in reducer.
Preconditions.checkState(!batch.selectedInUse);
final int size = batch.size;
if (size == 0) {
return;
}
DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
if (doubleColVector.isRepeating) {
if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
if (isGroupResultNull) {
min = doubleColVector.vector[0];
isGroupResultNull = false;
} else {
final double repeatedMin = doubleColVector.vector[0];
if (repeatedMin < min) {
min = repeatedMin;
}
}
}
} else if (doubleColVector.noNulls) {
double[] vector = doubleColVector.vector;
double varMin = vector[0];
for (int i = 1; i < size; i++) {
final double d = vector[i];
if (d < varMin) {
varMin = d;
}
}
if (isGroupResultNull) {
min = varMin;
isGroupResultNull = false;
} else if (varMin < min) {
min = varMin;
}
} else {
boolean[] batchIsNull = doubleColVector.isNull;
int i = 0;
while (batchIsNull[i]) {
if (++i >= size) {
return;
}
}
double[] vector = doubleColVector.vector;
double varMin = vector[i++];
for (; i < size; i++) {
if (!batchIsNull[i]) {
final double d = vector[i];
if (d < varMin) {
varMin = d;
}
}
}
if (isGroupResultNull) {
min = varMin;
isGroupResultNull = false;
} else if (varMin < min) {
min = varMin;
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorPTFEvaluatorDoubleSum method evaluateGroupBatch.
public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
evaluateInputExpr(batch);
// Sum all non-null double column values; maintain isGroupResultNull.
// We do not filter when PTF is in reducer.
Preconditions.checkState(!batch.selectedInUse);
final int size = batch.size;
if (size == 0) {
return;
}
DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
if (doubleColVector.isRepeating) {
if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = doubleColVector.vector[0] * batch.size;
isGroupResultNull = false;
} else {
sum += doubleColVector.vector[0] * batch.size;
}
}
} else if (doubleColVector.noNulls) {
double[] vector = doubleColVector.vector;
double varSum = vector[0];
for (int i = 1; i < size; i++) {
varSum += vector[i];
}
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = varSum;
isGroupResultNull = false;
} else {
sum += varSum;
}
} else {
boolean[] batchIsNull = doubleColVector.isNull;
int i = 0;
while (batchIsNull[i]) {
if (++i >= size) {
return;
}
}
double[] vector = doubleColVector.vector;
double varSum = vector[i++];
for (; i < size; i++) {
if (!batchIsNull[i]) {
varSum += vector[i];
}
}
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = varSum;
isGroupResultNull = false;
} else {
sum += varSum;
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorizedListColumnReader method fillColumnVector.
private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory category, ListColumnVector lcv, List valueList, int elementNum) {
int total = valueList.size();
setChildrenInfo(lcv, total, elementNum);
switch(category) {
case BOOLEAN:
lcv.child = new LongColumnVector(total);
for (int i = 0; i < valueList.size(); i++) {
((LongColumnVector) lcv.child).vector[i] = ((List<Integer>) valueList).get(i);
}
break;
case INT:
case BYTE:
case SHORT:
case DATE:
case INTERVAL_YEAR_MONTH:
case LONG:
lcv.child = new LongColumnVector(total);
for (int i = 0; i < valueList.size(); i++) {
((LongColumnVector) lcv.child).vector[i] = ((List<Long>) valueList).get(i);
}
break;
case DOUBLE:
lcv.child = new DoubleColumnVector(total);
for (int i = 0; i < valueList.size(); i++) {
((DoubleColumnVector) lcv.child).vector[i] = ((List<Double>) valueList).get(i);
}
break;
case BINARY:
case STRING:
case CHAR:
case VARCHAR:
lcv.child = new BytesColumnVector(total);
lcv.child.init();
for (int i = 0; i < valueList.size(); i++) {
byte[] src = ((List<byte[]>) valueList).get(i);
((BytesColumnVector) lcv.child).setRef(i, src, 0, src.length);
}
break;
case FLOAT:
lcv.child = new DoubleColumnVector(total);
for (int i = 0; i < valueList.size(); i++) {
((DoubleColumnVector) lcv.child).vector[i] = ((List<Float>) valueList).get(i);
}
break;
case DECIMAL:
decimalTypeCheck(type);
int precision = type.asPrimitiveType().getDecimalMetadata().getPrecision();
int scale = type.asPrimitiveType().getDecimalMetadata().getScale();
lcv.child = new DecimalColumnVector(total, precision, scale);
for (int i = 0; i < valueList.size(); i++) {
((DecimalColumnVector) lcv.child).vector[i].set(((List<byte[]>) valueList).get(i), scale);
}
break;
case INTERVAL_DAY_TIME:
case TIMESTAMP:
default:
throw new RuntimeException("Unsupported type in the list: " + type);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.
/**
* Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
*/
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, TypeInfo columnType, LongColumnVector dictionaryIds) {
System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
if (column.noNulls) {
column.noNulls = dictionaryIds.noNulls;
}
column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType;
switch(primitiveColumnType.getPrimitiveCategory()) {
case INT:
case BYTE:
case SHORT:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readInteger((int) dictionaryIds.vector[i]);
if (!(dictionary.isValid(((LongColumnVector) column).vector[i]))) {
setNullValue(column, i);
((LongColumnVector) column).vector[i] = 0;
}
}
break;
case DATE:
case INTERVAL_YEAR_MONTH:
case LONG:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readLong((int) dictionaryIds.vector[i]);
}
break;
case BOOLEAN:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.readDouble((int) dictionaryIds.vector[i]);
}
break;
case BINARY:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
}
break;
case STRING:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
}
break;
case VARCHAR:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
}
break;
case CHAR:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.readFloat((int) dictionaryIds.vector[i]);
}
break;
case DECIMAL:
decimalTypeCheck(type);
DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
for (int i = rowId; i < rowId + num; ++i) {
decimalColumnVector.vector[i].set(dictionary.readDecimal((int) dictionaryIds.vector[i]), decimalColumnVector.scale);
}
break;
case TIMESTAMP:
for (int i = rowId; i < rowId + num; ++i) {
((TimestampColumnVector) column).set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]));
}
break;
case INTERVAL_DAY_TIME:
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
Aggregations