Search in sources :

Example 1 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class ParquetColumnarRowSplitReaderTest method testProject.

@Test
public void testProject() throws IOException {
    // prepare parquet file
    int number = 1000;
    List<Row> records = new ArrayList<>(number);
    for (int i = 0; i < number; i++) {
        Integer v = i;
        records.add(newRow(v));
    }
    Path testPath = createTempParquetFile(TEMPORARY_FOLDER.newFolder(), PARQUET_SCHEMA, records, rowGroupSize);
    // test reader
    LogicalType[] fieldTypes = new LogicalType[] { new DoubleType(), new TinyIntType(), new IntType() };
    ParquetColumnarRowSplitReader reader = new ParquetColumnarRowSplitReader(false, true, new Configuration(), fieldTypes, new String[] { "f7", "f2", "f4" }, VectorizedColumnBatch::new, 500, new org.apache.hadoop.fs.Path(testPath.getPath()), 0, Long.MAX_VALUE);
    int i = 0;
    while (!reader.reachedEnd()) {
        ColumnarRowData row = reader.nextRecord();
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        i++;
    }
    reader.close();
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) DoubleType(org.apache.flink.table.types.logical.DoubleType) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 2 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class ParquetColumnarRowSplitReaderTest method innerTestPartitionValues.

private void innerTestPartitionValues(Path testPath, Map<String, Object> partSpec, boolean nullPartValue) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
    ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(false, true, new Configuration(), IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new), Arrays.stream(fieldTypes).map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), partSpec, new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 }, rowGroupSize, new Path(testPath.getPath()), 0, Long.MAX_VALUE);
    int i = 0;
    while (!reader.reachedEnd()) {
        ColumnarRowData row = reader.nextRecord();
        // common values
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        // partition values
        if (nullPartValue) {
            for (int j = 3; j < 16; j++) {
                assertTrue(row.isNullAt(j));
            }
        } else {
            assertTrue(row.getBoolean(3));
            assertEquals(9, row.getByte(4));
            assertEquals(10, row.getShort(5));
            assertEquals(11, row.getInt(6));
            assertEquals(12, row.getLong(7));
            assertEquals(13, row.getFloat(8), 0);
            assertEquals(6.6, row.getDouble(9), 0);
            assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
            assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
            assertEquals("f27", row.getString(15).toString());
        }
        i++;
    }
    reader.close();
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TypeConversions(org.apache.flink.table.types.utils.TypeConversions) BooleanType(org.apache.flink.table.types.logical.BooleanType) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) BigDecimal(java.math.BigDecimal) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) DataType(org.apache.flink.table.types.DataType) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType)

Example 3 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class VectorizedColumnBatchTest method testDictionary.

@Test
public void testDictionary() {
    // all null
    HeapIntVector col = new HeapIntVector(VECTOR_SIZE);
    Integer[] dict = new Integer[2];
    dict[0] = 1998;
    dict[1] = 9998;
    col.setDictionary(new ColumnVectorTest.TestDictionary(dict));
    HeapIntVector heapIntVector = col.reserveDictionaryIds(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        heapIntVector.vector[i] = i % 2 == 0 ? 0 : 1;
    }
    VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col });
    for (int i = 0; i < VECTOR_SIZE; i++) {
        ColumnarRowData row = new ColumnarRowData(batch, i);
        if (i % 2 == 0) {
            assertEquals(row.getInt(0), 1998);
        } else {
            assertEquals(row.getInt(0), 9998);
        }
    }
}
Also used : HeapIntVector(org.apache.flink.table.data.columnar.vector.heap.HeapIntVector) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) Test(org.junit.Test)

Example 4 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class VectorizedColumnBatchTest method testNull.

@Test
public void testNull() {
    // all null
    HeapIntVector col0 = new HeapIntVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        col0.setNullAt(i);
    }
    // some null
    HeapIntVector col1 = new HeapIntVector(VECTOR_SIZE);
    for (int i = 0; i < VECTOR_SIZE; i++) {
        if (i % 2 == 0) {
            col1.setNullAt(i);
        } else {
            col1.vector[i] = i;
        }
    }
    VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col0, col1 });
    for (int i = 0; i < VECTOR_SIZE; i++) {
        ColumnarRowData row = new ColumnarRowData(batch, i);
        assertTrue(row.isNullAt(0));
        if (i % 2 == 0) {
            assertTrue(row.isNullAt(1));
        } else {
            assertEquals(row.getInt(1), i);
        }
    }
}
Also used : HeapIntVector(org.apache.flink.table.data.columnar.vector.heap.HeapIntVector) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) Test(org.junit.Test)

Example 5 with ColumnarRowData

use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.

the class ParquetColumnarRowSplitReaderTest method readSplitAndCheck.

private int readSplitAndCheck(int start, long seekToRow, Path testPath, long splitStart, long splitLength, List<Integer> values) throws IOException {
    ParquetColumnarRowSplitReader reader = createReader(testPath, splitStart, splitLength);
    reader.seekToRow(seekToRow);
    int i = start;
    while (!reader.reachedEnd()) {
        ColumnarRowData row = reader.nextRecord();
        Integer v = values.get(i);
        if (v == null) {
            assertTrue(row.isNullAt(0));
            assertTrue(row.isNullAt(1));
            assertTrue(row.isNullAt(2));
            assertTrue(row.isNullAt(3));
            assertTrue(row.isNullAt(4));
            assertTrue(row.isNullAt(5));
            assertTrue(row.isNullAt(6));
            assertTrue(row.isNullAt(7));
            assertTrue(row.isNullAt(8));
            assertTrue(row.isNullAt(9));
            assertTrue(row.isNullAt(10));
            assertTrue(row.isNullAt(11));
            assertTrue(row.isNullAt(12));
            assertTrue(row.isNullAt(13));
            assertTrue(row.isNullAt(14));
        } else {
            assertEquals("" + v, row.getString(0).toString());
            assertEquals(v % 2 == 0, row.getBoolean(1));
            assertEquals(v.byteValue(), row.getByte(2));
            assertEquals(v.shortValue(), row.getShort(3));
            assertEquals(v.intValue(), row.getInt(4));
            assertEquals(v.longValue(), row.getLong(5));
            assertEquals(v.floatValue(), row.getFloat(6), 0);
            assertEquals(v.doubleValue(), row.getDouble(7), 0);
            assertEquals(toDateTime(v), row.getTimestamp(8, 9).toLocalDateTime());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(9, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(10, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(11, 20, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(12, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(13, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(14, 20, 0).toBigDecimal());
        }
        i++;
    }
    reader.close();
    return i - start;
}
Also used : ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData)

Aggregations

ColumnarRowData (org.apache.flink.table.data.columnar.ColumnarRowData)6 Test (org.junit.Test)4 HeapIntVector (org.apache.flink.table.data.columnar.vector.heap.HeapIntVector)3 Path (org.apache.flink.core.fs.Path)2 BigIntType (org.apache.flink.table.types.logical.BigIntType)2 DoubleType (org.apache.flink.table.types.logical.DoubleType)2 IntType (org.apache.flink.table.types.logical.IntType)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)2 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)2 Configuration (org.apache.hadoop.conf.Configuration)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 ColumnarArrayData (org.apache.flink.table.data.columnar.ColumnarArrayData)1 VectorizedColumnBatch (org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch)1 HeapBooleanVector (org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector)1 HeapByteVector (org.apache.flink.table.data.columnar.vector.heap.HeapByteVector)1 HeapBytesVector (org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector)1 HeapDoubleVector (org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector)1