Search in sources :

Example 1 with DateColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DateColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
 * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
 */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, TypeInfo columnType, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType;
    switch(primitiveColumnType.getPrimitiveCategory()) {
        case INT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readInteger((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((LongColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case BYTE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readTinyInt((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((LongColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case SHORT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readSmallInt((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((LongColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case DATE:
            DateColumnVector dc = (DateColumnVector) column;
            dc.setUsingProlepticCalendar(true);
            for (int i = rowId; i < rowId + num; ++i) {
                dc.vector[i] = skipProlepticConversion ? dictionary.readLong((int) dictionaryIds.vector[i]) : CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i]));
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    dc.vector[i] = 0;
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
        case LONG:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readLong((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((LongColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case BOOLEAN:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.readDouble((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((DoubleColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case BINARY:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
            }
            break;
        case STRING:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
            }
            break;
        case VARCHAR:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
            }
            break;
        case CHAR:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.readFloat((int) dictionaryIds.vector[i]);
                if (!dictionary.isValid()) {
                    setNullValue(column, i);
                    ((DoubleColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case DECIMAL:
            DecimalLogicalTypeAnnotation decimalLogicalType = null;
            if (type.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation) {
                decimalLogicalType = (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
            }
            DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
            byte[] decimalData = null;
            fillDecimalPrecisionScale(decimalLogicalType, decimalColumnVector);
            for (int i = rowId; i < rowId + num; ++i) {
                decimalData = dictionary.readDecimal((int) dictionaryIds.vector[i]);
                if (dictionary.isValid()) {
                    decimalColumnVector.vector[i].set(decimalData, decimalColumnVector.scale);
                } else {
                    setNullValue(column, i);
                }
            }
            break;
        case TIMESTAMP:
            TimestampColumnVector tsc = (TimestampColumnVector) column;
            tsc.setUsingProlepticCalendar(true);
            for (int i = rowId; i < rowId + num; ++i) {
                tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp());
            }
            break;
        case INTERVAL_DAY_TIME:
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)

Example 2 with DateColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DateColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateDiffColScalarWithTz.

@Test
public void testDateDiffColScalarWithTz() throws HiveException {
    final TimeZone originalTz = TimeZone.getDefault();
    try {
        TimeZone.setDefault(TimeZone.getTimeZone("GMT+8"));
        // input column vector - 1st arg to datediff()
        DateColumnVector dateColumnVector = new DateColumnVector(1);
        dateColumnVector.fill(LocalDate.parse("2021-07-06").toEpochDay());
        // scalar date string - 2nd arg to datediff()
        byte[] scalarDateBytes = "2021-07-01".getBytes(utf8);
        VectorExpression udf = new VectorUDFDateDiffColScalar(0, scalarDateBytes, 1);
        udf.setInputTypeInfos(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo);
        udf.transientInit(hiveConf);
        VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
        batch.cols[0] = dateColumnVector;
        // output container
        LongColumnVector outputVector = new LongColumnVector(1);
        batch.cols[1] = outputVector;
        udf.evaluate(batch);
        // ("2021-07-06" - "2021-07-01")
        Assert.assertEquals(5, outputVector.vector[0]);
    } finally {
        TimeZone.setDefault(originalTz);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) TimeZone(java.util.TimeZone) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 3 with DateColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DateColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateDiffScalarColWithTz.

@Test
public void testDateDiffScalarColWithTz() throws HiveException {
    final TimeZone originalTz = TimeZone.getDefault();
    try {
        TimeZone.setDefault(TimeZone.getTimeZone("GMT+8"));
        // scalar date string - 1st arg to datediff()
        byte[] scalarDateBytes = "2021-07-01".getBytes(utf8);
        // input column vector - 2nd arg to datediff()
        DateColumnVector dateColumnVector = new DateColumnVector(1);
        dateColumnVector.fill(LocalDate.parse("2021-07-06").toEpochDay());
        VectorExpression udf = new VectorUDFDateDiffScalarCol(scalarDateBytes, 0, 1);
        udf.setInputTypeInfos(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo);
        udf.transientInit(hiveConf);
        VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
        batch.cols[0] = dateColumnVector;
        // output container
        LongColumnVector outputVector = new LongColumnVector(1);
        batch.cols[1] = outputVector;
        udf.evaluate(batch);
        // ("2021-07-01" - "2021-07-06")
        Assert.assertEquals(-5, outputVector.vector[0]);
    } finally {
        TimeZone.setDefault(originalTz);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) TimeZone(java.util.TimeZone) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 4 with DateColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DateColumnVector in project hive by apache.

the class RecordReaderImpl method nextDate.

static DateWritableV2 nextDate(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        DateWritableV2 result;
        if (previous == null || previous.getClass() != DateWritableV2.class) {
            result = new DateWritableV2();
        } else {
            result = (DateWritableV2) previous;
        }
        int date = (int) ((DateColumnVector) vector).vector[row];
        result.set(date);
        return result;
    } else {
        return null;
    }
}
Also used : DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)

Example 5 with DateColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DateColumnVector in project hive by apache.

the class VectorBatchGenerator method assignColumnVectors.

public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, VectorColumnGroupGenerator columnGroup) {
    // UNDONE: Multiple types...
    GenerateType[] generateTypes = columnGroup.generateTypes();
    GenerateType generateType = generateTypes[0];
    ColumnVector colVector;
    switch(generateType.getCategory()) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            colVector = new LongColumnVector();
            break;
        case DATE:
            colVector = new DateColumnVector();
            break;
        case FLOAT:
        case DOUBLE:
            colVector = new DoubleColumnVector();
            break;
        case STRING:
        case CHAR:
        case VARCHAR:
        case BINARY:
            colVector = new BytesColumnVector();
            break;
        case TIMESTAMP:
            colVector = new TimestampColumnVector();
            break;
        case DECIMAL:
            colVector = new DecimalColumnVector(38, 18);
            break;
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
        default:
            throw new RuntimeException("Unsupported catagory " + generateType.getCategory());
    }
    colVector.init();
    batch.cols[columnNum] = colVector;
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

DateColumnVector (org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)5 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)4 TimeZone (java.util.TimeZone)2 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)2 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)2 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)2 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)2 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)2 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)2 Test (org.junit.Test)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 DecimalLogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation)1