Search in sources :

Example 6 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class SchemaTranslator method createConsumingResult.

/**
 * Converts the given {@link DataType} and an optional declared {@link Schema} (possibly
 * incomplete) into the final {@link ConsumingResult}.
 *
 * <p>This method serves three types of use cases:
 *
 * <ul>
 *   <li>1. Derive physical columns from the input data type.
 *   <li>2. Derive physical columns but merge them with declared computed columns and other
 *       schema information.
 *   <li>3. Derive and enrich physical columns and merge other schema information (only if
 *       {@param mergePhysicalSchema} is set to {@code true}).
 * </ul>
 */
public static ConsumingResult createConsumingResult(DataTypeFactory dataTypeFactory, DataType inputDataType, @Nullable Schema declaredSchema, boolean mergePhysicalSchema) {
    final LogicalType inputType = inputDataType.getLogicalType();
    // we don't allow modifying the number of columns during enrichment, therefore we preserve
    // whether the original type was qualified as a top-level record or not
    final boolean isTopLevelRecord = LogicalTypeChecks.isCompositeType(inputType);
    // the schema will be entirely derived from the input
    if (declaredSchema == null) {
        final Schema.Builder builder = Schema.newBuilder();
        addPhysicalSourceDataTypeFields(builder, inputDataType, null);
        return new ConsumingResult(inputDataType, isTopLevelRecord, builder.build(), null);
    }
    final List<UnresolvedColumn> declaredColumns = declaredSchema.getColumns();
    final UnresolvedPrimaryKey declaredPrimaryKey = declaredSchema.getPrimaryKey().orElse(null);
    // thus, it only enriches the non-physical column parts
    if (declaredColumns.stream().noneMatch(SchemaTranslator::isPhysical)) {
        final Schema.Builder builder = Schema.newBuilder();
        addPhysicalSourceDataTypeFields(builder, inputDataType, declaredPrimaryKey);
        builder.fromSchema(declaredSchema);
        return new ConsumingResult(inputDataType, isTopLevelRecord, builder.build(), null);
    }
    if (!mergePhysicalSchema) {
        return new ConsumingResult(inputDataType, isTopLevelRecord, declaredSchema, null);
    }
    // the declared schema enriches the physical data type and the derived schema,
    // it possibly projects the result
    final DataType patchedDataType = patchDataTypeFromDeclaredSchema(dataTypeFactory, inputDataType, declaredColumns);
    final Schema patchedSchema = createPatchedSchema(isTopLevelRecord, patchedDataType, declaredSchema);
    final List<String> projections = extractProjections(patchedSchema, declaredSchema);
    return new ConsumingResult(patchedDataType, isTopLevelRecord, patchedSchema, projections);
}
Also used : UnresolvedPrimaryKey(org.apache.flink.table.api.Schema.UnresolvedPrimaryKey) UnresolvedColumn(org.apache.flink.table.api.Schema.UnresolvedColumn) Schema(org.apache.flink.table.api.Schema) LogicalType(org.apache.flink.table.types.logical.LogicalType) DataType(org.apache.flink.table.types.DataType) AbstractDataType(org.apache.flink.table.types.AbstractDataType)

Example 7 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class DataStreamJavaITCase method testFromAndToChangelogStreamEventTime.

@Test
public void testFromAndToChangelogStreamEventTime() throws Exception {
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    final DataStream<Tuple3<Long, Integer, String>> dataStream = getWatermarkedDataStream();
    final DataStream<Row> changelogStream = dataStream.map(t -> Row.ofKind(RowKind.INSERT, t.f1, t.f2)).returns(Types.ROW(Types.INT, Types.STRING));
    // derive physical columns and add a rowtime
    final Table table = tableEnv.fromChangelogStream(changelogStream, Schema.newBuilder().columnByMetadata("rowtime", TIMESTAMP_LTZ(3)).columnByExpression("computed", $("f1").upperCase()).watermark("rowtime", sourceWatermark()).build());
    tableEnv.createTemporaryView("t", table);
    // access and reorder columns
    final Table reordered = tableEnv.sqlQuery("SELECT computed, rowtime, f0 FROM t");
    // write out the rowtime column with fully declared schema
    final DataStream<Row> result = tableEnv.toChangelogStream(reordered, Schema.newBuilder().column("f1", STRING()).columnByMetadata("rowtime", TIMESTAMP_LTZ(3)).columnByExpression("ignored", $("f1").upperCase()).column("f0", INT()).build());
    // test event time window and field access
    testResult(result.keyBy(k -> k.getField("f1")).window(TumblingEventTimeWindows.of(Time.milliseconds(5))).<Row>apply((key, window, input, out) -> {
        int sum = 0;
        for (Row row : input) {
            sum += row.<Integer>getFieldAs("f0");
        }
        out.collect(Row.of(key, sum));
    }).returns(Types.ROW(Types.STRING, Types.INT)), Row.of("A", 47), Row.of("C", 1000), Row.of("C", 1000));
}
Also used : DataType(org.apache.flink.table.types.DataType) BIGINT(org.apache.flink.table.api.DataTypes.BIGINT) STRING(org.apache.flink.table.api.DataTypes.STRING) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TableDescriptor(org.apache.flink.table.api.TableDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TIMESTAMP_LTZ(org.apache.flink.table.api.DataTypes.TIMESTAMP_LTZ) RawType(org.apache.flink.table.types.logical.RawType) ZoneOffset(java.time.ZoneOffset) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FIELD(org.apache.flink.table.api.DataTypes.FIELD) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) DOUBLE(org.apache.flink.table.api.DataTypes.DOUBLE) TableConfig(org.apache.flink.table.api.TableConfig) Expressions.$(org.apache.flink.table.api.Expressions.$) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Table(org.apache.flink.table.api.Table) ResolvedExpressionMock(org.apache.flink.table.expressions.utils.ResolvedExpressionMock) ZoneId(java.time.ZoneId) Objects(java.util.Objects) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) STRUCTURED(org.apache.flink.table.api.DataTypes.STRUCTURED) TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MAP(org.apache.flink.table.api.DataTypes.MAP) BOOLEAN(org.apache.flink.table.api.DataTypes.BOOLEAN) Either(org.apache.flink.types.Either) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) ROW(org.apache.flink.table.api.DataTypes.ROW) Column(org.apache.flink.table.catalog.Column) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LocalDateTime(java.time.LocalDateTime) Expressions.sourceWatermark(org.apache.flink.table.api.Expressions.sourceWatermark) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) Collector(org.apache.flink.util.Collector) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INT(org.apache.flink.table.api.DataTypes.INT) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) Time(org.apache.flink.streaming.api.windowing.time.Time) WatermarkSpec(org.apache.flink.table.catalog.WatermarkSpec) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) Parameter(org.junit.runners.Parameterized.Parameter) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) CollectionUtil(org.apache.flink.util.CollectionUtil) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowKind(org.apache.flink.types.RowKind) DayOfWeek(java.time.DayOfWeek) TIMESTAMP(org.apache.flink.table.api.DataTypes.TIMESTAMP) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.flink.table.api.Table) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Test(org.junit.Test)

Example 8 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class AbstractJdbcCatalog method getTable.

// ------ tables and views ------
@Override
public CatalogBaseTable getTable(ObjectPath tablePath) throws TableNotExistException, CatalogException {
    if (!tableExists(tablePath)) {
        throw new TableNotExistException(getName(), tablePath);
    }
    String dbUrl = baseUrl + tablePath.getDatabaseName();
    try (Connection conn = DriverManager.getConnection(dbUrl, username, pwd)) {
        DatabaseMetaData metaData = conn.getMetaData();
        Optional<UniqueConstraint> primaryKey = getPrimaryKey(metaData, getSchemaName(tablePath), getTableName(tablePath));
        PreparedStatement ps = conn.prepareStatement(String.format("SELECT * FROM %s;", getSchemaTableName(tablePath)));
        ResultSetMetaData resultSetMetaData = ps.getMetaData();
        String[] columnNames = new String[resultSetMetaData.getColumnCount()];
        DataType[] types = new DataType[resultSetMetaData.getColumnCount()];
        for (int i = 1; i <= resultSetMetaData.getColumnCount(); i++) {
            columnNames[i - 1] = resultSetMetaData.getColumnName(i);
            types[i - 1] = fromJDBCType(tablePath, resultSetMetaData, i);
            if (resultSetMetaData.isNullable(i) == ResultSetMetaData.columnNoNulls) {
                types[i - 1] = types[i - 1].notNull();
            }
        }
        Schema.Builder schemaBuilder = Schema.newBuilder().fromFields(columnNames, types);
        primaryKey.ifPresent(pk -> schemaBuilder.primaryKeyNamed(pk.getName(), pk.getColumns()));
        Schema tableSchema = schemaBuilder.build();
        Map<String, String> props = new HashMap<>();
        props.put(CONNECTOR.key(), IDENTIFIER);
        props.put(URL.key(), dbUrl);
        props.put(USERNAME.key(), username);
        props.put(PASSWORD.key(), pwd);
        props.put(TABLE_NAME.key(), getSchemaTableName(tablePath));
        return CatalogTable.of(tableSchema, null, Lists.newArrayList(), props);
    } catch (Exception e) {
        throw new CatalogException(String.format("Failed getting table %s", tablePath.getFullName()), e);
    }
}
Also used : HashMap(java.util.HashMap) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) Schema(org.apache.flink.table.api.Schema) Connection(java.sql.Connection) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) UniqueConstraint(org.apache.flink.table.catalog.UniqueConstraint) PreparedStatement(java.sql.PreparedStatement) DatabaseMetaData(java.sql.DatabaseMetaData) UniqueConstraint(org.apache.flink.table.catalog.UniqueConstraint) FunctionAlreadyExistException(org.apache.flink.table.catalog.exceptions.FunctionAlreadyExistException) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) PartitionSpecInvalidException(org.apache.flink.table.catalog.exceptions.PartitionSpecInvalidException) TablePartitionedException(org.apache.flink.table.catalog.exceptions.TablePartitionedException) FunctionNotExistException(org.apache.flink.table.catalog.exceptions.FunctionNotExistException) DatabaseNotEmptyException(org.apache.flink.table.catalog.exceptions.DatabaseNotEmptyException) DatabaseAlreadyExistException(org.apache.flink.table.catalog.exceptions.DatabaseAlreadyExistException) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) ValidationException(org.apache.flink.table.api.ValidationException) DatabaseNotExistException(org.apache.flink.table.catalog.exceptions.DatabaseNotExistException) PartitionAlreadyExistsException(org.apache.flink.table.catalog.exceptions.PartitionAlreadyExistsException) SQLException(java.sql.SQLException) TableAlreadyExistException(org.apache.flink.table.catalog.exceptions.TableAlreadyExistException) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) ResultSetMetaData(java.sql.ResultSetMetaData) DataType(org.apache.flink.table.types.DataType)

Example 9 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class HiveCatalogITCase method testViewSchema.

@Test
public void testViewSchema() throws Exception {
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.DEFAULT);
    tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tableEnv.useCatalog(hiveCatalog.getName());
    tableEnv.executeSql("create database db1");
    try {
        tableEnv.useDatabase("db1");
        tableEnv.executeSql("create table src(x int,ts timestamp(3)) with ('connector'='datagen','number-of-rows'='10')");
        tableEnv.executeSql("create view v1 as select x,ts from src order by x limit 3");
        CatalogView catalogView = (CatalogView) hiveCatalog.getTable(new ObjectPath("db1", "v1"));
        Schema viewSchema = catalogView.getUnresolvedSchema();
        assertThat(viewSchema).isEqualTo(Schema.newBuilder().fromFields(new String[] { "x", "ts" }, new AbstractDataType[] { DataTypes.INT(), DataTypes.TIMESTAMP(3) }).build());
        List<Row> results = CollectionUtil.iteratorToList(tableEnv.executeSql("select x from v1").collect());
        assertThat(results).hasSize(3);
        tableEnv.executeSql("create view v2 (v2_x,v2_ts) comment 'v2 comment' as select x,cast(ts as timestamp_ltz(3)) from v1");
        catalogView = (CatalogView) hiveCatalog.getTable(new ObjectPath("db1", "v2"));
        assertThat(catalogView.getUnresolvedSchema()).isEqualTo(Schema.newBuilder().fromFields(new String[] { "v2_x", "v2_ts" }, new AbstractDataType[] { DataTypes.INT(), DataTypes.TIMESTAMP_LTZ(3) }).build());
        assertThat(catalogView.getComment()).isEqualTo("v2 comment");
        results = CollectionUtil.iteratorToList(tableEnv.executeSql("select * from v2").collect());
        assertThat(results).hasSize(3);
    } finally {
        tableEnv.executeSql("drop database db1 cascade");
    }
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Schema(org.apache.flink.table.api.Schema) TableSchema(org.apache.flink.table.api.TableSchema) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) CatalogView(org.apache.flink.table.catalog.CatalogView) Test(org.junit.Test)

Example 10 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class SchemaResolutionTest method testGeneratedConstraintName.

@Test
public void testGeneratedConstraintName() {
    final Schema schema = Schema.newBuilder().column("a", DataTypes.INT()).column("b", DataTypes.STRING()).column("c", DataTypes.STRING()).primaryKey("b", "a").build();
    assertThat(schema.getPrimaryKey().orElseThrow(IllegalStateException::new).getConstraintName(), equalTo("PK_b_a"));
}
Also used : Schema(org.apache.flink.table.api.Schema) Test(org.junit.Test)

Aggregations

Schema (org.apache.flink.table.api.Schema)14 DataType (org.apache.flink.table.types.DataType)8 Test (org.junit.Test)7 Collections (java.util.Collections)5 List (java.util.List)5 ArrayList (java.util.ArrayList)4 Collectors (java.util.stream.Collectors)4 DataTypes (org.apache.flink.table.api.DataTypes)4 ValidationException (org.apache.flink.table.api.ValidationException)4 AbstractDataType (org.apache.flink.table.types.AbstractDataType)4 Arrays (java.util.Arrays)3 UnresolvedColumn (org.apache.flink.table.api.Schema.UnresolvedColumn)3 Row (org.apache.flink.types.Row)3 ZoneId (java.time.ZoneId)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Set (java.util.Set)2 Nullable (javax.annotation.Nullable)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2 ExplainDetail (org.apache.flink.table.api.ExplainDetail)2