use of org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader in project flink by apache.
the class ParquetRowDataWriterTest method innerTest.
private void innerTest(Configuration conf, boolean utcTimestamp) throws IOException {
Path path = new Path(TEMPORARY_FOLDER.newFolder().getPath(), UUID.randomUUID().toString());
int number = 1000;
List<Row> rows = new ArrayList<>(number);
for (int i = 0; i < number; i++) {
Integer v = i;
rows.add(Row.of(String.valueOf(v), String.valueOf(v).getBytes(StandardCharsets.UTF_8), v % 2 == 0, v.byteValue(), v.shortValue(), v, v.longValue(), v.floatValue(), v.doubleValue(), toDateTime(v), BigDecimal.valueOf(v), BigDecimal.valueOf(v), BigDecimal.valueOf(v)));
}
ParquetWriterFactory<RowData> factory = ParquetRowDataBuilder.createWriterFactory(ROW_TYPE, conf, utcTimestamp);
BulkWriter<RowData> writer = factory.create(path.getFileSystem().create(path, FileSystem.WriteMode.OVERWRITE));
for (int i = 0; i < number; i++) {
writer.addElement(CONVERTER.toInternal(rows.get(i)));
}
writer.flush();
writer.finish();
// verify
ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(utcTimestamp, true, conf, ROW_TYPE.getFieldNames().toArray(new String[0]), ROW_TYPE.getChildren().stream().map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), new HashMap<>(), IntStream.range(0, ROW_TYPE.getFieldCount()).toArray(), 50, path, 0, Long.MAX_VALUE);
int cnt = 0;
while (!reader.reachedEnd()) {
Row row = CONVERTER.toExternal(reader.nextRecord());
Assert.assertEquals(rows.get(cnt), row);
cnt++;
}
Assert.assertEquals(number, cnt);
}
Aggregations