Search in sources :

Example 1 with ParquetColumnarRowSplitReader

use of org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader in project flink by apache.

the class ParquetRowDataWriterTest method innerTest.

private void innerTest(Configuration conf, boolean utcTimestamp) throws IOException {
    Path path = new Path(TEMPORARY_FOLDER.newFolder().getPath(), UUID.randomUUID().toString());
    int number = 1000;
    List<Row> rows = new ArrayList<>(number);
    for (int i = 0; i < number; i++) {
        Integer v = i;
        rows.add(Row.of(String.valueOf(v), String.valueOf(v).getBytes(StandardCharsets.UTF_8), v % 2 == 0, v.byteValue(), v.shortValue(), v, v.longValue(), v.floatValue(), v.doubleValue(), toDateTime(v), BigDecimal.valueOf(v), BigDecimal.valueOf(v), BigDecimal.valueOf(v)));
    }
    ParquetWriterFactory<RowData> factory = ParquetRowDataBuilder.createWriterFactory(ROW_TYPE, conf, utcTimestamp);
    BulkWriter<RowData> writer = factory.create(path.getFileSystem().create(path, FileSystem.WriteMode.OVERWRITE));
    for (int i = 0; i < number; i++) {
        writer.addElement(CONVERTER.toInternal(rows.get(i)));
    }
    writer.flush();
    writer.finish();
    // verify
    ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(utcTimestamp, true, conf, ROW_TYPE.getFieldNames().toArray(new String[0]), ROW_TYPE.getChildren().stream().map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), new HashMap<>(), IntStream.range(0, ROW_TYPE.getFieldCount()).toArray(), 50, path, 0, Long.MAX_VALUE);
    int cnt = 0;
    while (!reader.reachedEnd()) {
        Row row = CONVERTER.toExternal(reader.nextRecord());
        Assert.assertEquals(rows.get(cnt), row);
        cnt++;
    }
    Assert.assertEquals(number, cnt);
}
Also used : Path(org.apache.flink.core.fs.Path) TypeConversions(org.apache.flink.table.types.utils.TypeConversions) ArrayList(java.util.ArrayList) ParquetColumnarRowSplitReader(org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) Row(org.apache.flink.types.Row)

Aggregations

ArrayList (java.util.ArrayList)1 Path (org.apache.flink.core.fs.Path)1 ParquetColumnarRowSplitReader (org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader)1 RowData (org.apache.flink.table.data.RowData)1 DataType (org.apache.flink.table.types.DataType)1 TypeConversions (org.apache.flink.table.types.utils.TypeConversions)1 Row (org.apache.flink.types.Row)1