Search in sources :

Example 26 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class CsvRowDataSerDeSchemaTest method testSerializationWithTypesMismatch.

@Test
public void testSerializationWithTypesMismatch() {
    DataType dataType = ROW(FIELD("f0", STRING()), FIELD("f1", INT()), FIELD("f2", INT()));
    RowType rowType = (RowType) dataType.getLogicalType();
    CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
    RowData rowData = rowData("Test", 1, "Test");
    String errorMessage = "Fail to serialize at field: f2.";
    try {
        serialize(serSchemaBuilder, rowData);
        fail("expecting exception message:" + errorMessage);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage));
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) StringData.fromString(org.apache.flink.table.data.StringData.fromString) Test(org.junit.Test)

Example 27 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class CsvRowDataSerDeSchemaTest method testSerDeConsistency.

private void testSerDeConsistency(RowData originalRow, CsvRowDataSerializationSchema.Builder serSchemaBuilder, CsvRowDataDeserializationSchema.Builder deserSchemaBuilder) throws Exception {
    RowData deserializedRow = deserialize(deserSchemaBuilder, new String(serialize(serSchemaBuilder, originalRow)));
    assertEquals(deserializedRow, originalRow);
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) StringData.fromString(org.apache.flink.table.data.StringData.fromString)

Example 28 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class ArrowUtils method collectAsPandasDataFrame.

/**
 * Convert Flink table to Pandas DataFrame.
 */
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
    checkArrowUsable();
    BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
    RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
    DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    Iterator<Row> results = table.execute().collect();
    Iterator<Row> appendOnlyResults;
    if (isAppendOnlyTable(table)) {
        appendOnlyResults = results;
    } else {
        appendOnlyResults = filterOutRetractRows(results);
    }
    ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
    Iterator convertedResults = new Iterator<RowData>() {

        @Override
        public boolean hasNext() {
            return appendOnlyResults.hasNext();
        }

        @Override
        public RowData next() {
            DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
            return (RowData) converter.toInternal(appendOnlyResults.next());
        }
    };
    return new CustomIterator<byte[]>() {

        @Override
        public boolean hasNext() {
            return convertedResults.hasNext();
        }

        @Override
        public byte[] next() {
            try {
                int i = 0;
                while (convertedResults.hasNext() && i < maxArrowBatchSize) {
                    i++;
                    arrowWriter.write(convertedResults.next());
                }
                arrowWriter.finish();
                arrowStreamWriter.writeBatch();
                return baos.toByteArray();
            } catch (Throwable t) {
                String msg = "Failed to serialize the data of the table";
                LOG.error(msg, t);
                throw new RuntimeException(msg, t);
            } finally {
                arrowWriter.reset();
                baos.reset();
                if (!hasNext()) {
                    root.close();
                    allocator.close();
                }
            }
        }
    };
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) RowType(org.apache.flink.table.types.logical.RowType) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) BufferAllocator(org.apache.arrow.memory.BufferAllocator) RowData(org.apache.flink.table.data.RowData) DataFormatConverters(org.apache.flink.table.data.util.DataFormatConverters) Iterator(java.util.Iterator) DataType(org.apache.flink.table.types.DataType) Row(org.apache.flink.types.Row)

Example 29 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class ElasticsearchDynamicSink method getSinkRuntimeProvider.

@Override
public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
    SerializationSchema<RowData> format = this.format.createRuntimeEncoder(context, physicalRowDataType);
    final RowElasticsearchEmitter rowElasticsearchEmitter = new RowElasticsearchEmitter(createIndexGenerator(), format, XContentType.JSON, documentType, createKeyExtractor());
    ElasticsearchSinkBuilderBase<RowData, ? extends ElasticsearchSinkBuilderBase> builder = builderSupplier.get();
    builder.setEmitter(rowElasticsearchEmitter);
    builder.setHosts(config.getHosts().toArray(new HttpHost[0]));
    builder.setDeliveryGuarantee(config.getDeliveryGuarantee());
    builder.setBulkFlushMaxActions(config.getBulkFlushMaxActions());
    builder.setBulkFlushMaxSizeMb(config.getBulkFlushMaxByteSize().getMebiBytes());
    builder.setBulkFlushInterval(config.getBulkFlushInterval());
    if (config.getBulkFlushBackoffType().isPresent()) {
        FlushBackoffType backoffType = config.getBulkFlushBackoffType().get();
        int backoffMaxRetries = config.getBulkFlushBackoffRetries().get();
        long backoffDelayMs = config.getBulkFlushBackoffDelay().get();
        builder.setBulkFlushBackoffStrategy(backoffType, backoffMaxRetries, backoffDelayMs);
    }
    if (config.getUsername().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) {
        builder.setConnectionUsername(config.getUsername().get());
    }
    if (config.getPassword().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get())) {
        builder.setConnectionPassword(config.getPassword().get());
    }
    if (config.getPathPrefix().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getPathPrefix().get())) {
        builder.setConnectionPathPrefix(config.getPathPrefix().get());
    }
    if (config.getConnectionRequestTimeout().isPresent()) {
        builder.setConnectionRequestTimeout((int) config.getConnectionRequestTimeout().get().getSeconds());
    }
    if (config.getConnectionTimeout().isPresent()) {
        builder.setConnectionTimeout((int) config.getConnectionTimeout().get().getSeconds());
    }
    if (config.getSocketTimeout().isPresent()) {
        builder.setSocketTimeout((int) config.getSocketTimeout().get().getSeconds());
    }
    return SinkV2Provider.of(builder.build(), config.getParallelism().orElse(null));
}
Also used : RowData(org.apache.flink.table.data.RowData) FlushBackoffType(org.apache.flink.connector.elasticsearch.sink.FlushBackoffType) HttpHost(org.apache.http.HttpHost)

Example 30 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class HiveLookupJoinITCase method testPartitionFetcherAndReader.

@Test
public void testPartitionFetcherAndReader() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
    lookupFunction.open(null);
    PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
    PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
    List<HiveTablePartition> partitions = fetcher.fetch(context);
    // fetch latest partition by partition-name
    assertEquals(1, partitions.size());
    PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
    reader.open(partitions);
    List<RowData> res = new ArrayList<>();
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
    TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
    RowData row;
    while ((row = reader.read(reuse)) != null) {
        res.add(serializer.copy(row));
    }
    res.sort(Comparator.comparingInt(o -> o.getInt(0)));
    assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Also used : PartitionReader(org.apache.flink.connector.file.table.PartitionReader) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) Arrays(java.util.Arrays) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) CatalogTable(org.apache.flink.table.catalog.CatalogTable) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ArrayList(java.util.ArrayList) GenericRowData(org.apache.flink.table.data.GenericRowData) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) Duration(java.time.Duration) DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) TableImpl(org.apache.flink.table.api.internal.TableImpl) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) STREAMING_SOURCE_ENABLE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_ENABLE) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Row(org.apache.flink.types.Row) Comparator(java.util.Comparator) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) GenericRowData(org.apache.flink.table.data.GenericRowData) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Aggregations

RowData (org.apache.flink.table.data.RowData)602 Test (org.junit.Test)201 GenericRowData (org.apache.flink.table.data.GenericRowData)178 ArrayList (java.util.ArrayList)109 RowType (org.apache.flink.table.types.logical.RowType)105 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)90 Watermark (org.apache.flink.streaming.api.watermark.Watermark)84 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)72 Transformation (org.apache.flink.api.dag.Transformation)70 Configuration (org.apache.flink.configuration.Configuration)68 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)67 List (java.util.List)65 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)54 DataType (org.apache.flink.table.types.DataType)52 Map (java.util.Map)42 LogicalType (org.apache.flink.table.types.logical.LogicalType)41 TableException (org.apache.flink.table.api.TableException)34 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)33 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)32 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)31