use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.
the class ParquetColumnarRowSplitReaderTest method testProject.
@Test
public void testProject() throws IOException {
// prepare parquet file
int number = 1000;
List<Row> records = new ArrayList<>(number);
for (int i = 0; i < number; i++) {
Integer v = i;
records.add(newRow(v));
}
Path testPath = createTempParquetFile(TEMPORARY_FOLDER.newFolder(), PARQUET_SCHEMA, records, rowGroupSize);
// test reader
LogicalType[] fieldTypes = new LogicalType[] { new DoubleType(), new TinyIntType(), new IntType() };
ParquetColumnarRowSplitReader reader = new ParquetColumnarRowSplitReader(false, true, new Configuration(), fieldTypes, new String[] { "f7", "f2", "f4" }, VectorizedColumnBatch::new, 500, new org.apache.hadoop.fs.Path(testPath.getPath()), 0, Long.MAX_VALUE);
int i = 0;
while (!reader.reachedEnd()) {
ColumnarRowData row = reader.nextRecord();
assertEquals(i, row.getDouble(0), 0);
assertEquals((byte) i, row.getByte(1));
assertEquals(i, row.getInt(2));
i++;
}
reader.close();
}
use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.
the class ParquetColumnarRowSplitReaderTest method innerTestPartitionValues.
private void innerTestPartitionValues(Path testPath, Map<String, Object> partSpec, boolean nullPartValue) throws IOException {
LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(false, true, new Configuration(), IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new), Arrays.stream(fieldTypes).map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), partSpec, new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 }, rowGroupSize, new Path(testPath.getPath()), 0, Long.MAX_VALUE);
int i = 0;
while (!reader.reachedEnd()) {
ColumnarRowData row = reader.nextRecord();
// common values
assertEquals(i, row.getDouble(0), 0);
assertEquals((byte) i, row.getByte(1));
assertEquals(i, row.getInt(2));
// partition values
if (nullPartValue) {
for (int j = 3; j < 16; j++) {
assertTrue(row.isNullAt(j));
}
} else {
assertTrue(row.getBoolean(3));
assertEquals(9, row.getByte(4));
assertEquals(10, row.getShort(5));
assertEquals(11, row.getInt(6));
assertEquals(12, row.getLong(7));
assertEquals(13, row.getFloat(8), 0);
assertEquals(6.6, row.getDouble(9), 0);
assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
assertEquals("f27", row.getString(15).toString());
}
i++;
}
reader.close();
}
use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.
the class VectorizedColumnBatchTest method testDictionary.
@Test
public void testDictionary() {
// all null
HeapIntVector col = new HeapIntVector(VECTOR_SIZE);
Integer[] dict = new Integer[2];
dict[0] = 1998;
dict[1] = 9998;
col.setDictionary(new ColumnVectorTest.TestDictionary(dict));
HeapIntVector heapIntVector = col.reserveDictionaryIds(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
heapIntVector.vector[i] = i % 2 == 0 ? 0 : 1;
}
VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col });
for (int i = 0; i < VECTOR_SIZE; i++) {
ColumnarRowData row = new ColumnarRowData(batch, i);
if (i % 2 == 0) {
assertEquals(row.getInt(0), 1998);
} else {
assertEquals(row.getInt(0), 9998);
}
}
}
use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.
the class VectorizedColumnBatchTest method testNull.
@Test
public void testNull() {
// all null
HeapIntVector col0 = new HeapIntVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
col0.setNullAt(i);
}
// some null
HeapIntVector col1 = new HeapIntVector(VECTOR_SIZE);
for (int i = 0; i < VECTOR_SIZE; i++) {
if (i % 2 == 0) {
col1.setNullAt(i);
} else {
col1.vector[i] = i;
}
}
VectorizedColumnBatch batch = new VectorizedColumnBatch(new ColumnVector[] { col0, col1 });
for (int i = 0; i < VECTOR_SIZE; i++) {
ColumnarRowData row = new ColumnarRowData(batch, i);
assertTrue(row.isNullAt(0));
if (i % 2 == 0) {
assertTrue(row.isNullAt(1));
} else {
assertEquals(row.getInt(1), i);
}
}
}
use of org.apache.flink.table.data.columnar.ColumnarRowData in project flink by apache.
the class ParquetColumnarRowSplitReaderTest method readSplitAndCheck.
private int readSplitAndCheck(int start, long seekToRow, Path testPath, long splitStart, long splitLength, List<Integer> values) throws IOException {
ParquetColumnarRowSplitReader reader = createReader(testPath, splitStart, splitLength);
reader.seekToRow(seekToRow);
int i = start;
while (!reader.reachedEnd()) {
ColumnarRowData row = reader.nextRecord();
Integer v = values.get(i);
if (v == null) {
assertTrue(row.isNullAt(0));
assertTrue(row.isNullAt(1));
assertTrue(row.isNullAt(2));
assertTrue(row.isNullAt(3));
assertTrue(row.isNullAt(4));
assertTrue(row.isNullAt(5));
assertTrue(row.isNullAt(6));
assertTrue(row.isNullAt(7));
assertTrue(row.isNullAt(8));
assertTrue(row.isNullAt(9));
assertTrue(row.isNullAt(10));
assertTrue(row.isNullAt(11));
assertTrue(row.isNullAt(12));
assertTrue(row.isNullAt(13));
assertTrue(row.isNullAt(14));
} else {
assertEquals("" + v, row.getString(0).toString());
assertEquals(v % 2 == 0, row.getBoolean(1));
assertEquals(v.byteValue(), row.getByte(2));
assertEquals(v.shortValue(), row.getShort(3));
assertEquals(v.intValue(), row.getInt(4));
assertEquals(v.longValue(), row.getLong(5));
assertEquals(v.floatValue(), row.getFloat(6), 0);
assertEquals(v.doubleValue(), row.getDouble(7), 0);
assertEquals(toDateTime(v), row.getTimestamp(8, 9).toLocalDateTime());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(9, 5, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(10, 15, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(11, 20, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(12, 5, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(13, 15, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(14, 20, 0).toBigDecimal());
}
i++;
}
reader.close();
return i - start;
}
Aggregations