Search in sources :

Example 16 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ParquetColumnarRowInputFormatTest method testReadingSplit.

private int testReadingSplit(List<Integer> expected, Path path, long splitStart, long splitLength, long seekToRow) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0) };
    ParquetColumnarRowInputFormat format = new ParquetColumnarRowInputFormat(new Configuration(), RowType.of(fieldTypes, new String[] { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14" }), null, 500, false, true);
    // validate java serialization
    try {
        InstantiationUtil.clone(format);
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    FileStatus fileStatus = path.getFileSystem().getFileStatus(path);
    BulkFormat.Reader<RowData> reader = format.restoreReader(EMPTY_CONF, new FileSourceSplit("id", path, splitStart, splitLength, fileStatus.getModificationTime(), fileStatus.getLen(), new String[0], new CheckpointedPosition(CheckpointedPosition.NO_OFFSET, seekToRow)));
    AtomicInteger cnt = new AtomicInteger(0);
    final AtomicReference<RowData> previousRow = new AtomicReference<>();
    forEachRemaining(reader, row -> {
        if (previousRow.get() == null) {
            previousRow.set(row);
        } else {
            // ParquetColumnarRowInputFormat should only have one row instance.
            assertSame(previousRow.get(), row);
        }
        Integer v = expected.get(cnt.get());
        if (v == null) {
            assertTrue(row.isNullAt(0));
            assertTrue(row.isNullAt(1));
            assertTrue(row.isNullAt(2));
            assertTrue(row.isNullAt(3));
            assertTrue(row.isNullAt(4));
            assertTrue(row.isNullAt(5));
            assertTrue(row.isNullAt(6));
            assertTrue(row.isNullAt(7));
            assertTrue(row.isNullAt(8));
            assertTrue(row.isNullAt(9));
            assertTrue(row.isNullAt(10));
            assertTrue(row.isNullAt(11));
            assertTrue(row.isNullAt(12));
            assertTrue(row.isNullAt(13));
            assertTrue(row.isNullAt(14));
        } else {
            assertEquals("" + v, row.getString(0).toString());
            assertEquals(v % 2 == 0, row.getBoolean(1));
            assertEquals(v.byteValue(), row.getByte(2));
            assertEquals(v.shortValue(), row.getShort(3));
            assertEquals(v.intValue(), row.getInt(4));
            assertEquals(v.longValue(), row.getLong(5));
            assertEquals(v.floatValue(), row.getFloat(6), 0);
            assertEquals(v.doubleValue(), row.getDouble(7), 0);
            assertEquals(toDateTime(v), row.getTimestamp(8, 9).toLocalDateTime());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(9, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(10, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(11, 20, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(12, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(13, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(14, 20, 0).toBigDecimal());
        }
        cnt.incrementAndGet();
    });
    return cnt.get();
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) RowData(org.apache.flink.table.data.RowData) CheckpointedPosition(org.apache.flink.connector.file.src.util.CheckpointedPosition) TimestampType(org.apache.flink.table.types.logical.TimestampType) VarCharType(org.apache.flink.table.types.logical.VarCharType) BooleanType(org.apache.flink.table.types.logical.BooleanType) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) DecimalType(org.apache.flink.table.types.logical.DecimalType) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat)

Example 17 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ParquetColumnarRowInputFormatTest method testProjection.

@Test
public void testProjection() throws IOException {
    int number = 1000;
    List<Row> records = new ArrayList<>(number);
    for (int i = 0; i < number; i++) {
        Integer v = i;
        records.add(newRow(v));
    }
    Path testPath = createTempParquetFile(TEMPORARY_FOLDER.newFolder(), PARQUET_SCHEMA, records, rowGroupSize);
    // test reader
    LogicalType[] fieldTypes = new LogicalType[] { new DoubleType(), new TinyIntType(), new IntType() };
    ParquetColumnarRowInputFormat<FileSourceSplit> format = new ParquetColumnarRowInputFormat(new Configuration(), RowType.of(fieldTypes, new String[] { "f7", "f2", "f4" }), null, 500, false, true);
    AtomicInteger cnt = new AtomicInteger(0);
    forEachRemaining(format.createReader(EMPTY_CONF, new FileSourceSplit("id", testPath, 0, Long.MAX_VALUE, 0, Long.MAX_VALUE)), row -> {
        int i = cnt.get();
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        cnt.incrementAndGet();
    });
}
Also used : Path(org.apache.flink.core.fs.Path) PartitionPathUtils.generatePartitionPath(org.apache.flink.table.utils.PartitionPathUtils.generatePartitionPath) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 18 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ParquetColumnarRowInputFormatTest method testProjectionReadUnknownField.

@Test
public void testProjectionReadUnknownField() throws IOException {
    int number = 1000;
    List<Row> records = new ArrayList<>(number);
    for (int i = 0; i < number; i++) {
        Integer v = i;
        records.add(newRow(v));
    }
    Path testPath = createTempParquetFile(TEMPORARY_FOLDER.newFolder(), PARQUET_SCHEMA, records, rowGroupSize);
    // test reader
    LogicalType[] fieldTypes = new LogicalType[] { new DoubleType(), new TinyIntType(), new IntType(), new VarCharType() };
    ParquetColumnarRowInputFormat<FileSourceSplit> format = new ParquetColumnarRowInputFormat<>(new Configuration(), // f99 not exist in parquet file.
    RowType.of(fieldTypes, new String[] { "f7", "f2", "f4", "f99" }), null, 500, false, true);
    AtomicInteger cnt = new AtomicInteger(0);
    forEachRemaining(format.createReader(EMPTY_CONF, new FileSourceSplit("id", testPath, 0, Long.MAX_VALUE, 0, Long.MAX_VALUE)), row -> {
        int i = cnt.get();
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        assertTrue(row.isNullAt(3));
        cnt.incrementAndGet();
    });
}
Also used : Path(org.apache.flink.core.fs.Path) PartitionPathUtils.generatePartitionPath(org.apache.flink.table.utils.PartitionPathUtils.generatePartitionPath) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) Row(org.apache.flink.types.Row) VarCharType(org.apache.flink.table.types.logical.VarCharType) Test(org.junit.Test)

Example 19 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ParquetSplitReaderUtil method genPartColumnarRowReader.

/**
 * Util for generating partitioned {@link ParquetColumnarRowSplitReader}.
 */
public static ParquetColumnarRowSplitReader genPartColumnarRowReader(boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map<String, Object> partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength) throws IOException {
    List<String> nonPartNames = Arrays.stream(fullFieldNames).filter(n -> !partitionSpec.containsKey(n)).collect(Collectors.toList());
    List<String> selNonPartNames = Arrays.stream(selectedFields).mapToObj(i -> fullFieldNames[i]).filter(nonPartNames::contains).collect(Collectors.toList());
    int[] selParquetFields = selNonPartNames.stream().mapToInt(nonPartNames::indexOf).toArray();
    ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> {
        // create and initialize the row batch
        ColumnVector[] vectors = new ColumnVector[selectedFields.length];
        for (int i = 0; i < vectors.length; i++) {
            String name = fullFieldNames[selectedFields[i]];
            LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType();
            vectors[i] = partitionSpec.containsKey(name) ? createVectorFromConstant(type, partitionSpec.get(name), batchSize) : readVectors[selNonPartNames.indexOf(name)];
        }
        return new VectorizedColumnBatch(vectors);
    };
    return new ParquetColumnarRowSplitReader(utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields).mapToObj(i -> fullFieldTypes[i].getLogicalType()).toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) HeapTimestampVector(org.apache.flink.table.data.columnar.vector.heap.HeapTimestampVector) DataType(org.apache.flink.table.types.DataType) Arrays(java.util.Arrays) Preconditions.checkArgument(org.apache.parquet.Preconditions.checkArgument) FloatColumnReader(org.apache.flink.formats.parquet.vector.reader.FloatColumnReader) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) PageReader(org.apache.parquet.column.page.PageReader) HeapLongVector(org.apache.flink.table.data.columnar.vector.heap.HeapLongVector) HeapFloatVector(org.apache.flink.table.data.columnar.vector.heap.HeapFloatVector) IntColumnReader(org.apache.flink.formats.parquet.vector.reader.IntColumnReader) BigDecimal(java.math.BigDecimal) BytesColumnReader(org.apache.flink.formats.parquet.vector.reader.BytesColumnReader) DecimalType(org.apache.flink.table.types.logical.DecimalType) Path(org.apache.flink.core.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ColumnVector(org.apache.flink.table.data.columnar.vector.ColumnVector) TimestampColumnReader(org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader) Preconditions(org.apache.flink.util.Preconditions) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) ShortColumnReader(org.apache.flink.formats.parquet.vector.reader.ShortColumnReader) WritableColumnVector(org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector) List(java.util.List) LogicalType(org.apache.flink.table.types.logical.LogicalType) HeapDoubleVector(org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector) DateTimeUtils.toInternal(org.apache.flink.table.utils.DateTimeUtils.toInternal) HeapIntVector(org.apache.flink.table.data.columnar.vector.heap.HeapIntVector) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) FixedLenBytesColumnReader(org.apache.flink.formats.parquet.vector.reader.FixedLenBytesColumnReader) LocalDate(java.time.LocalDate) BooleanColumnReader(org.apache.flink.formats.parquet.vector.reader.BooleanColumnReader) ColumnReader(org.apache.flink.formats.parquet.vector.reader.ColumnReader) IntType(org.apache.flink.table.types.logical.IntType) LocalDateTime(java.time.LocalDateTime) HeapBytesVector(org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector) ParquetSchemaConverter(org.apache.flink.formats.parquet.utils.ParquetSchemaConverter) HeapByteVector(org.apache.flink.table.data.columnar.vector.heap.HeapByteVector) OriginalType(org.apache.parquet.schema.OriginalType) TimestampData(org.apache.flink.table.data.TimestampData) BigIntType(org.apache.flink.table.types.logical.BigIntType) ByteColumnReader(org.apache.flink.formats.parquet.vector.reader.ByteColumnReader) LongColumnReader(org.apache.flink.formats.parquet.vector.reader.LongColumnReader) DecimalData(org.apache.flink.table.data.DecimalData) IOException(java.io.IOException) Date(java.sql.Date) DoubleColumnReader(org.apache.flink.formats.parquet.vector.reader.DoubleColumnReader) HeapShortVector(org.apache.flink.table.data.columnar.vector.heap.HeapShortVector) VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) HeapBooleanVector(org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) LogicalType(org.apache.flink.table.types.logical.LogicalType)

Example 20 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class JdbcOutputFormatBuilder method createBufferReduceExecutor.

private static JdbcBatchStatementExecutor<RowData> createBufferReduceExecutor(JdbcDmlOptions opt, RuntimeContext ctx, TypeInformation<RowData> rowDataTypeInfo, LogicalType[] fieldTypes) {
    checkArgument(opt.getKeyFields().isPresent());
    JdbcDialect dialect = opt.getDialect();
    String tableName = opt.getTableName();
    String[] pkNames = opt.getKeyFields().get();
    int[] pkFields = Arrays.stream(pkNames).mapToInt(Arrays.asList(opt.getFieldNames())::indexOf).toArray();
    LogicalType[] pkTypes = Arrays.stream(pkFields).mapToObj(f -> fieldTypes[f]).toArray(LogicalType[]::new);
    final TypeSerializer<RowData> typeSerializer = rowDataTypeInfo.createSerializer(ctx.getExecutionConfig());
    final Function<RowData, RowData> valueTransform = ctx.getExecutionConfig().isObjectReuseEnabled() ? typeSerializer::copy : Function.identity();
    return new TableBufferReducedStatementExecutor(createUpsertRowExecutor(dialect, tableName, opt.getFieldNames(), fieldTypes, pkFields, pkNames, pkTypes), createDeleteExecutor(dialect, tableName, pkNames, pkTypes), createRowKeyExtractor(fieldTypes, pkFields), valueTransform);
}
Also used : DataType(org.apache.flink.table.types.DataType) Arrays(java.util.Arrays) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) JdbcExecutionOptions(org.apache.flink.connector.jdbc.JdbcExecutionOptions) JdbcDmlOptions(org.apache.flink.connector.jdbc.internal.options.JdbcDmlOptions) JdbcRowConverter(org.apache.flink.connector.jdbc.converter.JdbcRowConverter) RowType(org.apache.flink.table.types.logical.RowType) Function(java.util.function.Function) FieldNamedPreparedStatement(org.apache.flink.connector.jdbc.statement.FieldNamedPreparedStatement) RowData.createFieldGetter(org.apache.flink.table.data.RowData.createFieldGetter) TableSimpleStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.TableSimpleStatementExecutor) GenericRowData(org.apache.flink.table.data.GenericRowData) JdbcBatchStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.JdbcBatchStatementExecutor) TableBufferReducedStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.TableBufferReducedStatementExecutor) TableBufferedStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.TableBufferedStatementExecutor) SimpleJdbcConnectionProvider(org.apache.flink.connector.jdbc.internal.connection.SimpleJdbcConnectionProvider) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) JdbcOutputFormat(org.apache.flink.connector.jdbc.internal.JdbcOutputFormat) JdbcConnectorOptions(org.apache.flink.connector.jdbc.internal.options.JdbcConnectorOptions) Serializable(java.io.Serializable) LogicalType(org.apache.flink.table.types.logical.LogicalType) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) TableInsertOrUpdateStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.TableInsertOrUpdateStatementExecutor) JdbcDialect(org.apache.flink.connector.jdbc.dialect.JdbcDialect) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) LogicalType(org.apache.flink.table.types.logical.LogicalType) JdbcDialect(org.apache.flink.connector.jdbc.dialect.JdbcDialect) TableBufferReducedStatementExecutor(org.apache.flink.connector.jdbc.internal.executor.TableBufferReducedStatementExecutor)

Aggregations

LogicalType (org.apache.flink.table.types.logical.LogicalType)192 DataType (org.apache.flink.table.types.DataType)53 RowType (org.apache.flink.table.types.logical.RowType)53 RowData (org.apache.flink.table.data.RowData)45 List (java.util.List)29 ArrayList (java.util.ArrayList)28 TableException (org.apache.flink.table.api.TableException)25 TimestampType (org.apache.flink.table.types.logical.TimestampType)25 Internal (org.apache.flink.annotation.Internal)21 IntType (org.apache.flink.table.types.logical.IntType)21 Map (java.util.Map)20 ValidationException (org.apache.flink.table.api.ValidationException)20 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)19 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)17 Test (org.junit.Test)17 BigIntType (org.apache.flink.table.types.logical.BigIntType)16 LegacyTypeInformationType (org.apache.flink.table.types.logical.LegacyTypeInformationType)16 GenericRowData (org.apache.flink.table.data.GenericRowData)15 Arrays (java.util.Arrays)14