Search in sources :

Example 6 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class VectorizedColumnBatchTest method testTyped.

@Test
public void testTyped() throws IOException {
    HeapBooleanVector col0 = new HeapBooleanVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col0.vector[i] = i % 2 == 0;
    }
    HeapBytesVector col1 = new HeapBytesVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        byte[] bytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
        col1.appendBytes(i, bytes, 0, bytes.length);
    }
    HeapByteVector col2 = new HeapByteVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col2.vector[i] = (byte) i;
    }
    HeapDoubleVector col3 = new HeapDoubleVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col3.vector[i] = i;
    }
    HeapFloatVector col4 = new HeapFloatVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col4.vector[i] = i;
    }
    HeapIntVector col5 = new HeapIntVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col5.vector[i] = i;
    }
    HeapLongVector col6 = new HeapLongVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col6.vector[i] = i;
    }
    HeapShortVector col7 = new HeapShortVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col7.vector[i] = (short) i;
    }
    // The precision of Timestamp in parquet should be one of MILLIS, MICROS or NANOS.
    // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
    // 
    // For MILLIS, the underlying INT64 holds milliseconds
    // For MICROS, the underlying INT64 holds microseconds
    // For NANOS, the underlying INT96 holds nanoOfDay(8 bytes) and julianDay(4 bytes)
    long[] vector8 = new long[VECTOR_SIZE];
    for (int i = 0; i < VECTOR_SIZE; i++) {
        vector8[i] = i;
    }
    TimestampColumnVector col8 = new TimestampColumnVector() {

        @Override
        public boolean isNullAt(int i) {
            return false;
        }

        @Override
        public TimestampData getTimestamp(int i, int precision) {
            return TimestampData.fromEpochMillis(vector8[i]);
        }
    };
    long[] vector9 = new long[VECTOR_SIZE];
    for (int i = 0; i < VECTOR_SIZE; i++) {
        vector9[i] = i * 1000;
    }
    TimestampColumnVector col9 = new TimestampColumnVector() {

        @Override
        public TimestampData getTimestamp(int i, int precision) {
            long microseconds = vector9[i];
            return TimestampData.fromEpochMillis(microseconds / 1000, (int) (microseconds % 1000) * 1000);
        }

        @Override
        public boolean isNullAt(int i) {
            return false;
        }
    };
    HeapBytesVector vector10 = new HeapBytesVector(VECTOR_SIZE);
    {
        int nanosecond = 123456789;
        int start = 0;
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        for (int i = 0; i < VECTOR_SIZE; i++) {
            byte[] bytes = new byte[12];
            // i means second
            long l = i * 1000000000L + nanosecond;
            for (int j = 0; j < 8; j++) {
                bytes[7 - j] = (byte) l;
                l >>>= 8;
            }
            // Epoch Julian
            int n = 2440588;
            for (int j = 0; j < 4; j++) {
                bytes[11 - j] = (byte) n;
                n >>>= 8;
            }
            vector10.start[i] = start;
            vector10.length[i] = 12;
            start += 12;
            out.write(bytes);
        }
        vector10.buffer = out.toByteArray();
    }
    TimestampColumnVector col10 = new TimestampColumnVector() {

        @Override
        public TimestampData getTimestamp(int colId, int precision) {
            byte[] bytes = vector10.getBytes(colId).getBytes();
            assert bytes.length == 12;
            long nanoOfDay = 0;
            for (int i = 0; i < 8; i++) {
                nanoOfDay <<= 8;
                nanoOfDay |= (bytes[i] & (0xff));
            }
            int julianDay = 0;
            for (int i = 8; i < 12; i++) {
                julianDay <<= 8;
                julianDay |= (bytes[i] & (0xff));
            }
            long millisecond = (julianDay - DateTimeUtils.EPOCH_JULIAN) * DateTimeUtils.MILLIS_PER_DAY + nanoOfDay / 1000000;
            int nanoOfMillisecond = (int) (nanoOfDay % 1000000);
            return TimestampData.fromEpochMillis(millisecond, nanoOfMillisecond);
        }

        @Override
        public boolean isNullAt(int i) {
            return false;
        }
    };
    long[] vector11 = new long[VECTOR_SIZE];
    DecimalColumnVector col11 = new DecimalColumnVector() {

        @Override
        public boolean isNullAt(int i) {
            return false;
        }

        @Override
        public DecimalData getDecimal(int i, int precision, int scale) {
            return DecimalData.fromUnscaledLong(vector11[i], precision, scale);
        }
    };
    for (int i = 0; i < VECTOR_SIZE; i++) {
        vector11[i] = i;
    }
    HeapIntVector col12Data = new HeapIntVector(VECTOR_SIZE * ARRAY_SIZE);
    for (int i = 0; i < VECTOR_SIZE * ARRAY_SIZE; i++) {
        col12Data.vector[i] = i;
    }
    ArrayColumnVector col12 = new ArrayColumnVector() {

        @Override
        public boolean isNullAt(int i) {
            return false;
        }

        @Override
        public ArrayData getArray(int i) {
            return new ColumnarArrayData(col12Data, i * ARRAY_SIZE, ARRAY_SIZE);
        }
    };
    VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12 });
    batch.setNumRows(VECTOR_SIZE);
    for (int i = 0; i < batch.getNumRows(); i++) {
        ColumnarRowData row = new ColumnarRowData(batch, i);
        assertEquals(row.getBoolean(0), i % 2 == 0);
        assertEquals(row.getString(1).toString(), String.valueOf(i));
        assertEquals(row.getByte(2), (byte) i);
        assertEquals(row.getDouble(3), i, 0);
        assertEquals(row.getFloat(4), (float) i, 0);
        assertEquals(row.getInt(5), i);
        assertEquals(row.getLong(6), i);
        assertEquals(row.getShort(7), (short) i);
        assertEquals(row.getTimestamp(8, 3).getMillisecond(), i);
        assertEquals(row.getTimestamp(9, 6).getMillisecond(), i);
        assertEquals(row.getTimestamp(10, 9).getMillisecond(), i * 1000L + 123);
        assertEquals(row.getTimestamp(10, 9).getNanoOfMillisecond(), 456789);
        assertEquals(row.getDecimal(11, 10, 0).toUnscaledLong(), i);
        for (int j = 0; j < ARRAY_SIZE; j++) {
            assertEquals(row.getArray(12).getInt(j), i * ARRAY_SIZE + j);
        }
    }
    assertEquals(VECTOR_SIZE, batch.getNumRows());
}
Also used : HeapLongVector(org.apache.flink.table.data.columnar.vector.heap.HeapLongVector) HeapShortVector(org.apache.flink.table.data.columnar.vector.heap.HeapShortVector) HeapByteVector(org.apache.flink.table.data.columnar.vector.heap.HeapByteVector) HeapDoubleVector(org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector) ColumnarArrayData(org.apache.flink.table.data.columnar.ColumnarArrayData) HeapBytesVector(org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector) HeapIntVector(org.apache.flink.table.data.columnar.vector.heap.HeapIntVector) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HeapBooleanVector(org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) HeapFloatVector(org.apache.flink.table.data.columnar.vector.heap.HeapFloatVector) Test(org.junit.Test)

Aggregations

ColumnarRowData (org.apache.flink.table.data.columnar.ColumnarRowData)6 Test (org.junit.Test)4 HeapIntVector (org.apache.flink.table.data.columnar.vector.heap.HeapIntVector)3 Path (org.apache.flink.core.fs.Path)2 BigIntType (org.apache.flink.table.types.logical.BigIntType)2 DoubleType (org.apache.flink.table.types.logical.DoubleType)2 IntType (org.apache.flink.table.types.logical.IntType)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)2 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)2 Configuration (org.apache.hadoop.conf.Configuration)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 ColumnarArrayData (org.apache.flink.table.data.columnar.ColumnarArrayData)1 VectorizedColumnBatch (org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch)1 HeapBooleanVector (org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector)1 HeapByteVector (org.apache.flink.table.data.columnar.vector.heap.HeapByteVector)1 HeapBytesVector (org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector)1 HeapDoubleVector (org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector)1