Search in sources :

Example 46 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogGenericMetadataTest method testTableSchemaCompatibility.

@Test
public // NOTE: Be careful to modify this test, it is important to backward compatibility
void testTableSchemaCompatibility() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    try {
        // table with numeric types
        ObjectPath tablePath = new ObjectPath(db1, "generic1");
        Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "ti");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "TINYINT");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "si");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "SMALLINT");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "i");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "INT");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "bi");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BIGINT");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "f");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "FLOAT");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "d");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "DOUBLE");
        hiveTable.getParameters().put("flink.generic.table.schema.6.name", "de");
        hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "DECIMAL(10, 5)");
        hiveTable.getParameters().put("flink.generic.table.schema.7.name", "cost");
        hiveTable.getParameters().put("flink.generic.table.schema.7.expr", "`d` * `bi`");
        hiveTable.getParameters().put("flink.generic.table.schema.7.data-type", "DOUBLE");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        CatalogBaseTable catalogBaseTable = catalog.getTable(tablePath);
        assertFalse(HiveCatalog.isHiveTable(catalogBaseTable.getOptions()));
        TableSchema expectedSchema = TableSchema.builder().fields(new String[] { "ti", "si", "i", "bi", "f", "d", "de" }, new DataType[] { DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.DECIMAL(10, 5) }).field("cost", DataTypes.DOUBLE(), "`d` * `bi`").build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with character types
        tablePath = new ObjectPath(db1, "generic2");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "c");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "CHAR(265)");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "vc");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "VARCHAR(65536)");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "s");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "VARCHAR(2147483647)");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "b");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BINARY(1)");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "vb");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "VARBINARY(255)");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "bs");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "VARBINARY(2147483647)");
        hiveTable.getParameters().put("flink.generic.table.schema.6.name", "len");
        hiveTable.getParameters().put("flink.generic.table.schema.6.expr", "CHAR_LENGTH(`s`)");
        hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "INT");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "c", "vc", "s", "b", "vb", "bs" }, new DataType[] { DataTypes.CHAR(265), DataTypes.VARCHAR(65536), DataTypes.STRING(), DataTypes.BINARY(1), DataTypes.VARBINARY(255), DataTypes.BYTES() }).field("len", DataTypes.INT(), "CHAR_LENGTH(`s`)").build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with date/time types
        tablePath = new ObjectPath(db1, "generic3");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "dt");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "DATE");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "t");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "TIME(0)");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "tstz");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "TIMESTAMP(6) WITH LOCAL TIME ZONE");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "ts");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "dt", "t", "ts", "tstz" }, new DataType[] { DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() }).watermark("ts", "ts", DataTypes.TIMESTAMP(3)).build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with complex/misc types
        tablePath = new ObjectPath(db1, "generic4");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "a");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "ARRAY<INT>");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "m");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "MAP<BIGINT, TIMESTAMP(6)>");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "mul");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "MULTISET<DOUBLE>");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "r");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "ROW<`f1` INT, `f2` VARCHAR(2147483647)>");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "b");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "BOOLEAN");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "`ts` - INTERVAL '5' SECOND");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "a", "m", "mul", "r", "b", "ts" }, new DataType[] { DataTypes.ARRAY(DataTypes.INT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.TIMESTAMP()), DataTypes.MULTISET(DataTypes.DOUBLE()), DataTypes.ROW(DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING())), DataTypes.BOOLEAN(), DataTypes.TIMESTAMP(3) }).watermark("ts", "`ts` - INTERVAL '5' SECOND", DataTypes.TIMESTAMP(3)).build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
    } finally {
        catalog.dropDatabase(db1, true, true);
    }
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) TableSchema(org.apache.flink.table.api.TableSchema) DataType(org.apache.flink.table.types.DataType) Test(org.junit.Test)

Example 47 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogITCase method testCsvTableViaAPI.

@Test
public void testCsvTableViaAPI() throws Exception {
    TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
    tableEnv.getConfig().addConfiguration(new Configuration().set(CoreOptions.DEFAULT_PARALLELISM, 1));
    tableEnv.registerCatalog("myhive", hiveCatalog);
    tableEnv.useCatalog("myhive");
    final TableSchema schema = TableSchema.builder().field("name", DataTypes.STRING()).field("age", DataTypes.INT()).build();
    final Map<String, String> sourceOptions = new HashMap<>();
    sourceOptions.put("connector.type", "filesystem");
    sourceOptions.put("connector.path", getClass().getResource("/csv/test.csv").getPath());
    sourceOptions.put("format.type", "csv");
    CatalogTable source = new CatalogTableImpl(schema, sourceOptions, "Comment.");
    Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
    final Map<String, String> sinkOptions = new HashMap<>();
    sinkOptions.put("connector.type", "filesystem");
    sinkOptions.put("connector.path", p.toAbsolutePath().toString());
    sinkOptions.put("format.type", "csv");
    CatalogTable sink = new CatalogTableImpl(schema, sinkOptions, "Comment.");
    hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName), source, false);
    hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
    Table t = tableEnv.sqlQuery(String.format("select * from myhive.`default`.%s", sourceTableName));
    List<Row> result = CollectionUtil.iteratorToList(t.execute().collect());
    result.sort(Comparator.comparing(String::valueOf));
    // assert query result
    assertThat(result).containsExactly(Row.of("1", 1), Row.of("2", 2), Row.of("3", 3));
    tableEnv.executeSql(String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s", sinkTableName, sourceTableName)).await();
    // assert written result
    File resultFile = new File(p.toAbsolutePath().toString());
    BufferedReader reader = new BufferedReader(new FileReader(resultFile));
    String readLine;
    for (int i = 0; i < 3; i++) {
        readLine = reader.readLine();
        assertThat(readLine).isEqualTo(String.format("%d,%d", i + 1, i + 1));
    }
    // No more line
    assertThat(reader.readLine()).isNull();
    tableEnv.executeSql(String.format("DROP TABLE %s", sourceTableName));
    tableEnv.executeSql(String.format("DROP TABLE %s", sinkTableName));
}
Also used : Path(java.nio.file.Path) ObjectPath(org.apache.flink.table.catalog.ObjectPath) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Configuration(org.apache.flink.configuration.Configuration) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Row(org.apache.flink.types.Row) File(java.io.File) Test(org.junit.Test)

Example 48 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class TableEnvHiveConnectorITCase method testPKConstraint.

@Test
public void testPKConstraint() throws Exception {
    // While PK constraints are supported since Hive 2.1.0, the constraints cannot be RELY in
    // 2.x versions.
    // So let's only test for 3.x.
    Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
    TableEnvironment tableEnv = getTableEnvWithHiveCatalog();
    tableEnv.executeSql("create database db1");
    try {
        // test rely PK constraints
        tableEnv.executeSql("create table db1.tbl1 (x tinyint,y smallint,z int, primary key (x,z) disable novalidate rely)");
        CatalogBaseTable catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl1"));
        TableSchema tableSchema = catalogTable.getSchema();
        assertTrue(tableSchema.getPrimaryKey().isPresent());
        UniqueConstraint pk = tableSchema.getPrimaryKey().get();
        assertEquals(2, pk.getColumns().size());
        assertTrue(pk.getColumns().containsAll(Arrays.asList("x", "z")));
        // test norely PK constraints
        tableEnv.executeSql("create table db1.tbl2 (x tinyint,y smallint, primary key (x) disable norely)");
        catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl2"));
        tableSchema = catalogTable.getSchema();
        assertFalse(tableSchema.getPrimaryKey().isPresent());
        // test table w/o PK
        tableEnv.executeSql("create table db1.tbl3 (x tinyint)");
        catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl3"));
        tableSchema = catalogTable.getSchema();
        assertFalse(tableSchema.getPrimaryKey().isPresent());
    } finally {
        tableEnv.executeSql("drop database db1 cascade");
    }
}
Also used : CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Test(org.junit.Test)

Example 49 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class TableEnvHiveConnectorITCase method testNotNullConstraints.

@Test
public void testNotNullConstraints() throws Exception {
    Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
    TableEnvironment tableEnv = getTableEnvWithHiveCatalog();
    tableEnv.executeSql("create database db1");
    try {
        tableEnv.executeSql("create table db1.tbl (x int,y bigint not null enable rely,z string not null enable norely)");
        CatalogBaseTable catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl"));
        TableSchema tableSchema = catalogTable.getSchema();
        assertTrue("By default columns should be nullable", tableSchema.getFieldDataTypes()[0].getLogicalType().isNullable());
        assertFalse("NOT NULL columns should be reflected in table schema", tableSchema.getFieldDataTypes()[1].getLogicalType().isNullable());
        assertTrue("NOT NULL NORELY columns should be considered nullable", tableSchema.getFieldDataTypes()[2].getLogicalType().isNullable());
    } finally {
        tableEnv.executeSql("drop database db1 cascade");
    }
}
Also used : CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Test(org.junit.Test)

Example 50 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveDialectITCase method testCreateTableWithConstraints.

@Test
public void testCreateTableWithConstraints() throws Exception {
    Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
    tableEnv.executeSql("create table tbl (x int,y int not null disable novalidate rely,z int not null disable novalidate norely," + "constraint pk_name primary key (x) disable rely)");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(new ObjectPath("default", "tbl"));
    TableSchema tableSchema = catalogTable.getSchema();
    assertTrue("PK not present", tableSchema.getPrimaryKey().isPresent());
    assertEquals("pk_name", tableSchema.getPrimaryKey().get().getName());
    assertFalse("PK cannot be null", tableSchema.getFieldDataTypes()[0].getLogicalType().isNullable());
    assertFalse("RELY NOT NULL should be reflected in schema", tableSchema.getFieldDataTypes()[1].getLogicalType().isNullable());
    assertTrue("NORELY NOT NULL shouldn't be reflected in schema", tableSchema.getFieldDataTypes()[2].getLogicalType().isNullable());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Aggregations

TableSchema (org.apache.flink.table.api.TableSchema)86 Test (org.junit.Test)54 HashMap (java.util.HashMap)26 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)21 SqlNode (org.apache.calcite.sql.SqlNode)19 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 CatalogTable (org.apache.flink.table.catalog.CatalogTable)18 DataType (org.apache.flink.table.types.DataType)16 ValidationException (org.apache.flink.table.api.ValidationException)14 TableColumn (org.apache.flink.table.api.TableColumn)10 UniqueConstraint (org.apache.flink.table.api.constraints.UniqueConstraint)10 ArrayList (java.util.ArrayList)9 List (java.util.List)9 Map (java.util.Map)9 FeatureOption (org.apache.flink.sql.parser.ddl.SqlTableLike.FeatureOption)9 MergingStrategy (org.apache.flink.sql.parser.ddl.SqlTableLike.MergingStrategy)9 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)8 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)8 Arrays (java.util.Arrays)7 Configuration (org.apache.flink.configuration.Configuration)7