Search in sources :

Example 81 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class ColumnVectorGenUtil method generateTimestampColumnVector.

public static TimestampColumnVector generateTimestampColumnVector(boolean nulls, boolean repeating, int size, Random rand, Timestamp[] timestampValues) {
    TimestampColumnVector tcv = new TimestampColumnVector(size);
    tcv.noNulls = !nulls;
    tcv.isRepeating = repeating;
    Timestamp repeatingTimestamp = RandomTypeUtil.getRandTimestamp(rand);
    int nullFrequency = generateNullFrequency(rand);
    for (int i = 0; i < size; i++) {
        if (nulls && (repeating || i % nullFrequency == 0)) {
            tcv.isNull[i] = true;
            tcv.setNullValue(i);
            timestampValues[i] = null;
        } else {
            tcv.isNull[i] = false;
            if (!repeating) {
                Timestamp randomTimestamp = RandomTypeUtil.getRandTimestamp(rand);
                tcv.set(i, randomTimestamp.toSqlTimestamp());
                timestampValues[i] = randomTimestamp;
            } else {
                tcv.set(i, repeatingTimestamp.toSqlTimestamp());
                timestampValues[i] = repeatingTimestamp;
            }
        }
    }
    return tcv;
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Timestamp(org.apache.hadoop.hive.common.type.Timestamp)

Example 82 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class VectorColumnGroupGenerator method populateBatchColumn.

private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnIndex, int size) {
    int columnNum = columnNums[logicalColumnIndex];
    ColumnVector colVector = batch.cols[columnNum];
    GenerateType generateType = generateTypes[logicalColumnIndex];
    GenerateCategory category = generateType.getCategory();
    boolean allowNulls = generateType.getAllowNulls();
    boolean[] isNull = isNullArrays[logicalColumnIndex];
    if (allowNulls) {
        for (int i = 0; i < size; i++) {
            if (isNull[i]) {
                colVector.isNull[i] = true;
                colVector.noNulls = false;
            }
        }
    }
    Object array = arrays[logicalColumnIndex];
    switch(category) {
        case BOOLEAN:
            {
                boolean[] booleanArray = ((boolean[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = (booleanArray[i] ? 1 : 0);
                    }
                }
            }
            break;
        case BYTE:
            {
                byte[] byteArray = ((byte[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = byteArray[i];
                    }
                }
            }
            break;
        case SHORT:
            {
                short[] shortArray = ((short[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = shortArray[i];
                    }
                }
            }
            break;
        case INT:
            {
                int[] intArray = ((int[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = intArray[i];
                    }
                }
            }
            break;
        case LONG:
            {
                long[] longArray = ((long[]) array);
                long[] vector = ((LongColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = longArray[i];
                    }
                }
            }
            break;
        case FLOAT:
            {
                float[] floatArray = ((float[]) array);
                double[] vector = ((DoubleColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = floatArray[i];
                    }
                }
            }
            break;
        case DOUBLE:
            {
                double[] doubleArray = ((double[]) array);
                double[] vector = ((DoubleColumnVector) colVector).vector;
                for (int i = 0; i < size; i++) {
                    if (isNull[i]) {
                        vector[i] = 0;
                    } else {
                        vector[i] = doubleArray[i];
                    }
                }
            }
            break;
        case STRING:
            {
                String[] stringArray = ((String[]) array);
                BytesColumnVector bytesColVec = ((BytesColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        byte[] bytes = stringArray[i].getBytes();
                        bytesColVec.setVal(i, bytes);
                    }
                }
            }
            break;
        case BINARY:
            {
                byte[][] byteArrayArray = ((byte[][]) array);
                BytesColumnVector bytesColVec = ((BytesColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        byte[] bytes = byteArrayArray[i];
                        bytesColVec.setVal(i, bytes);
                    }
                }
            }
            break;
        case DATE:
            {
                Date[] dateArray = ((Date[]) array);
                LongColumnVector longColVec = ((LongColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        Date date = dateArray[i];
                        longColVec.vector[i] = DateWritableV2.dateToDays(date);
                    }
                }
            }
            break;
        case TIMESTAMP:
            {
                Timestamp[] timestampArray = ((Timestamp[]) array);
                TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        Timestamp timestamp = timestampArray[i];
                        timestampColVec.set(i, timestamp);
                    }
                }
            }
            break;
        case CHAR:
            {
                HiveChar[] hiveCharArray = ((HiveChar[]) array);
                BytesColumnVector bytesColVec = ((BytesColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        byte[] bytes = hiveCharArray[i].getValue().getBytes();
                        bytesColVec.setVal(i, bytes);
                    }
                }
            }
            break;
        case VARCHAR:
            {
                HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array);
                BytesColumnVector bytesColVec = ((BytesColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        byte[] bytes = hiveCharArray[i].getValue().getBytes();
                        bytesColVec.setVal(i, bytes);
                    }
                }
            }
            break;
        case DECIMAL:
            {
                HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array);
                DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector);
                for (int i = 0; i < size; i++) {
                    if (!isNull[i]) {
                        HiveDecimalWritable decWritable = hiveDecimalWritableArray[i];
                        decimalColVec.set(i, decWritable);
                    }
                }
            }
            break;
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
        default:
            throw new RuntimeException("Unepected generate category " + category);
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Timestamp(java.sql.Timestamp) Date(org.apache.hadoop.hive.common.type.Date) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) GenerateType(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType) GenerateCategory(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 83 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project flink by apache.

the class OrcBulkRowDataWriterTest method readMap.

/**
 * Read MapColumnVector with specify schema {@literal
 * map<string,struct<_col3_col0:string,_col3_col1:timestamp>>}.
 */
private static MapData readMap(MapColumnVector mapVector, int row) {
    int offset = (int) mapVector.offsets[row];
    StringData keyData = readStringData((BytesColumnVector) mapVector.keys, offset);
    GenericRowData valueData = new GenericRowData(2);
    StructColumnVector structVector = (StructColumnVector) mapVector.values;
    BytesColumnVector bytesVector = (BytesColumnVector) structVector.fields[0];
    TimestampColumnVector timestampVector = (TimestampColumnVector) structVector.fields[1];
    StringData strValueData = readStringData(bytesVector, offset);
    TimestampData timestampData = readTimestamp(timestampVector, offset);
    valueData.setField(0, strValueData);
    valueData.setField(1, timestampData);
    Map<StringData, RowData> mapDataMap = new HashMap<>();
    mapDataMap.put(keyData, valueData);
    return new GenericMapData(mapDataMap);
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) GenericMapData(org.apache.flink.table.data.GenericMapData) TimestampData(org.apache.flink.table.data.TimestampData) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) HashMap(java.util.HashMap) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) GenericRowData(org.apache.flink.table.data.GenericRowData) BinaryStringData(org.apache.flink.table.data.binary.BinaryStringData) StringData(org.apache.flink.table.data.StringData)

Example 84 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project flink by apache.

the class OrcColumnarRowSplitReaderTest method prepareReadFileWithTypes.

protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
    // NOTE: orc has field name information, so name should be same as orc
    TypeDescription schema = TypeDescription.fromString("struct<" + "f0:float," + "f1:double," + "f2:timestamp," + "f3:tinyint," + "f4:smallint" + ">");
    org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
    Configuration conf = new Configuration();
    Writer writer = OrcFile.createWriter(filePath, OrcFile.writerOptions(conf).setSchema(schema));
    VectorizedRowBatch batch = schema.createRowBatch(rowSize);
    DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
    DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
    TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
    LongColumnVector col3 = (LongColumnVector) batch.cols[3];
    LongColumnVector col4 = (LongColumnVector) batch.cols[4];
    col0.noNulls = false;
    col1.noNulls = false;
    col2.noNulls = false;
    col3.noNulls = false;
    col4.noNulls = false;
    for (int i = 0; i < rowSize - 1; i++) {
        col0.vector[i] = i;
        col1.vector[i] = i;
        Timestamp timestamp = toTimestamp(i);
        col2.time[i] = timestamp.getTime();
        col2.nanos[i] = timestamp.getNanos();
        col3.vector[i] = i;
        col4.vector[i] = i;
    }
    col0.isNull[rowSize - 1] = true;
    col1.isNull[rowSize - 1] = true;
    col2.isNull[rowSize - 1] = true;
    col3.isNull[rowSize - 1] = true;
    col4.isNull[rowSize - 1] = true;
    batch.size = rowSize;
    writer.addRowBatch(batch);
    batch.reset();
    writer.close();
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) Timestamp(java.sql.Timestamp) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TypeDescription(org.apache.orc.TypeDescription) Writer(org.apache.orc.Writer) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 85 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project alluxio by Alluxio.

the class OrcRow method getColumn.

@Override
public Object getColumn(String column) {
    final Integer columnPosition = mColumnNamePosition.get(column);
    if (columnPosition == null) {
        throw new IllegalArgumentException("Invalid column name: " + column);
    }
    final ColumnVector col = mBatch.cols[columnPosition];
    if (col.isNull[mPosition]) {
        return null;
    }
    if (col instanceof TimestampColumnVector) {
        return ((TimestampColumnVector) col).asScratchTimestamp(mPosition).getTime();
    } else if (col instanceof VoidColumnVector) {
        return null;
    } else if (col instanceof DecimalColumnVector) {
        final HiveDecimal hiveDecimal = ((DecimalColumnVector) col).vector[mPosition].getHiveDecimal();
        return hiveDecimal;
    } else if (col instanceof LongColumnVector) {
        return ((LongColumnVector) col).vector[mPosition];
    } else if (col instanceof BytesColumnVector) {
        BytesColumnVector bcv = (BytesColumnVector) col;
        return Arrays.copyOfRange(bcv.vector[mPosition], bcv.start[mPosition], bcv.start[mPosition] + bcv.length[mPosition]);
    } else if (col instanceof DoubleColumnVector) {
        return ((DoubleColumnVector) col).vector[mPosition];
    }
    throw new UnsupportedOperationException("Unsupported column vector: " + col.getClass().getName());
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VoidColumnVector(org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) VoidColumnVector(org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)85 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)31 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)26 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)21 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 Timestamp (java.sql.Timestamp)16 Test (org.junit.Test)11 Random (java.util.Random)10 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)9 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)7 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)6 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 HiveIntervalDayTime (org.apache.hadoop.hive.common.type.HiveIntervalDayTime)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2