Search in sources :

Example 21 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class CsvRowDataSerDeSchemaTest method testSerializationWithTypesMismatch.

@Test
public void testSerializationWithTypesMismatch() {
    DataType dataType = ROW(FIELD("f0", STRING()), FIELD("f1", INT()), FIELD("f2", INT()));
    RowType rowType = (RowType) dataType.getLogicalType();
    CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
    RowData rowData = rowData("Test", 1, "Test");
    String errorMessage = "Fail to serialize at field: f2.";
    try {
        serialize(serSchemaBuilder, rowData);
        fail("expecting exception message:" + errorMessage);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage));
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) StringData.fromString(org.apache.flink.table.data.StringData.fromString) Test(org.junit.Test)

Example 22 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class ArrowUtils method collectAsPandasDataFrame.

/**
 * Convert Flink table to Pandas DataFrame.
 */
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
    checkArrowUsable();
    BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
    RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
    DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    Iterator<Row> results = table.execute().collect();
    Iterator<Row> appendOnlyResults;
    if (isAppendOnlyTable(table)) {
        appendOnlyResults = results;
    } else {
        appendOnlyResults = filterOutRetractRows(results);
    }
    ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
    Iterator convertedResults = new Iterator<RowData>() {

        @Override
        public boolean hasNext() {
            return appendOnlyResults.hasNext();
        }

        @Override
        public RowData next() {
            DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
            return (RowData) converter.toInternal(appendOnlyResults.next());
        }
    };
    return new CustomIterator<byte[]>() {

        @Override
        public boolean hasNext() {
            return convertedResults.hasNext();
        }

        @Override
        public byte[] next() {
            try {
                int i = 0;
                while (convertedResults.hasNext() && i < maxArrowBatchSize) {
                    i++;
                    arrowWriter.write(convertedResults.next());
                }
                arrowWriter.finish();
                arrowStreamWriter.writeBatch();
                return baos.toByteArray();
            } catch (Throwable t) {
                String msg = "Failed to serialize the data of the table";
                LOG.error(msg, t);
                throw new RuntimeException(msg, t);
            } finally {
                arrowWriter.reset();
                baos.reset();
                if (!hasNext()) {
                    root.close();
                    allocator.close();
                }
            }
        }
    };
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) RowType(org.apache.flink.table.types.logical.RowType) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) BufferAllocator(org.apache.arrow.memory.BufferAllocator) RowData(org.apache.flink.table.data.RowData) DataFormatConverters(org.apache.flink.table.data.util.DataFormatConverters) Iterator(java.util.Iterator) DataType(org.apache.flink.table.types.DataType) Row(org.apache.flink.types.Row)

Example 23 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class ElasticsearchDynamicSinkFactoryBase method getPrimaryKeyLogicalTypesWithIndex.

List<LogicalTypeWithIndex> getPrimaryKeyLogicalTypesWithIndex(Context context) {
    DataType physicalRowDataType = context.getPhysicalRowDataType();
    int[] primaryKeyIndexes = context.getPrimaryKeyIndexes();
    if (primaryKeyIndexes.length != 0) {
        DataType pkDataType = Projection.of(primaryKeyIndexes).project(physicalRowDataType);
        ElasticsearchValidationUtils.validatePrimaryKey(pkDataType);
    }
    ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema();
    return Arrays.stream(primaryKeyIndexes).mapToObj(index -> {
        Optional<Column> column = resolvedSchema.getColumn(index);
        if (!column.isPresent()) {
            throw new IllegalStateException(String.format("No primary key column found with index '%s'.", index));
        }
        LogicalType logicalType = column.get().getDataType().getLogicalType();
        return new LogicalTypeWithIndex(index, logicalType);
    }).collect(Collectors.toList());
}
Also used : DataType(org.apache.flink.table.types.DataType) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) Arrays(java.util.Arrays) BULK_FLUSH_MAX_SIZE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION) SerializationFormatFactory(org.apache.flink.table.factories.SerializationFormatFactory) HOSTS_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.HOSTS_OPTION) INDEX_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.INDEX_OPTION) Column(org.apache.flink.table.catalog.Column) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) USERNAME_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.USERNAME_OPTION) Supplier(java.util.function.Supplier) TableConfigOptions(org.apache.flink.table.api.config.TableConfigOptions) ReadableConfig(org.apache.flink.configuration.ReadableConfig) BULK_FLUSH_MAX_ACTIONS_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION) SOCKET_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.SOCKET_TIMEOUT) ConfigOption(org.apache.flink.configuration.ConfigOption) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) BULK_FLUSH_BACKOFF_TYPE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_TYPE_OPTION) PASSWORD_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.PASSWORD_OPTION) Nullable(javax.annotation.Nullable) Projection(org.apache.flink.table.connector.Projection) RowData(org.apache.flink.table.data.RowData) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) CONNECTION_REQUEST_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_REQUEST_TIMEOUT) KEY_DELIMITER_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.KEY_DELIMITER_OPTION) Strings.capitalize(org.elasticsearch.common.Strings.capitalize) DynamicTableSinkFactory(org.apache.flink.table.factories.DynamicTableSinkFactory) BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION) Set(java.util.Set) SINK_PARALLELISM(org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM) StringUtils(org.apache.flink.util.StringUtils) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) CONNECTION_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_TIMEOUT) BULK_FLUSH_INTERVAL_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_INTERVAL_OPTION) List(java.util.List) Stream(java.util.stream.Stream) DELIVERY_GUARANTEE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.DELIVERY_GUARANTEE_OPTION) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) LogicalType(org.apache.flink.table.types.logical.LogicalType) FORMAT_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.FORMAT_OPTION) ValidationException(org.apache.flink.table.api.ValidationException) BULK_FLUSH_BACKOFF_DELAY_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_DELAY_OPTION) Optional(java.util.Optional) Internal(org.apache.flink.annotation.Internal) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) CONNECTION_PATH_PREFIX_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_PATH_PREFIX_OPTION) Optional(java.util.Optional) DataType(org.apache.flink.table.types.DataType) LogicalType(org.apache.flink.table.types.logical.LogicalType) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema)

Example 24 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 25 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class HiveCatalogDataTypeTest method testNonSupportedBinaryDataTypes.

@Test
public void testNonSupportedBinaryDataTypes() throws Exception {
    DataType[] types = new DataType[] { DataTypes.BINARY(BinaryType.MAX_LENGTH) };
    CatalogTable table = createCatalogTable(types);
    catalog.createDatabase(db1, createDb(), false);
    exception.expect(UnsupportedOperationException.class);
    catalog.createTable(path1, table, false);
}
Also used : DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Aggregations

DataType (org.apache.flink.table.types.DataType)260 Test (org.junit.Test)72 RowType (org.apache.flink.table.types.logical.RowType)59 LogicalType (org.apache.flink.table.types.logical.LogicalType)58 RowData (org.apache.flink.table.data.RowData)54 List (java.util.List)38 FieldsDataType (org.apache.flink.table.types.FieldsDataType)32 ValidationException (org.apache.flink.table.api.ValidationException)31 ArrayList (java.util.ArrayList)29 Collectors (java.util.stream.Collectors)24 AtomicDataType (org.apache.flink.table.types.AtomicDataType)24 Map (java.util.Map)23 Internal (org.apache.flink.annotation.Internal)23 TableException (org.apache.flink.table.api.TableException)23 HashMap (java.util.HashMap)22 GenericRowData (org.apache.flink.table.data.GenericRowData)22 Row (org.apache.flink.types.Row)22 TableSchema (org.apache.flink.table.api.TableSchema)20 TypeConversions.fromLogicalToDataType (org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType)19 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)18