use of org.apache.flink.table.api.TableSchema in project flink by apache.
the class HiveCatalogGenericMetadataTest method testTableSchemaCompatibility.
@Test
public // NOTE: Be careful to modify this test, it is important to backward compatibility
void testTableSchemaCompatibility() throws Exception {
catalog.createDatabase(db1, createDb(), false);
try {
// table with numeric types
ObjectPath tablePath = new ObjectPath(db1, "generic1");
Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "ti");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "TINYINT");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "si");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "SMALLINT");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "i");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "INT");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "bi");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BIGINT");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "f");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "FLOAT");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "d");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "DOUBLE");
hiveTable.getParameters().put("flink.generic.table.schema.6.name", "de");
hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "DECIMAL(10, 5)");
hiveTable.getParameters().put("flink.generic.table.schema.7.name", "cost");
hiveTable.getParameters().put("flink.generic.table.schema.7.expr", "`d` * `bi`");
hiveTable.getParameters().put("flink.generic.table.schema.7.data-type", "DOUBLE");
((HiveCatalog) catalog).client.createTable(hiveTable);
CatalogBaseTable catalogBaseTable = catalog.getTable(tablePath);
assertFalse(HiveCatalog.isHiveTable(catalogBaseTable.getOptions()));
TableSchema expectedSchema = TableSchema.builder().fields(new String[] { "ti", "si", "i", "bi", "f", "d", "de" }, new DataType[] { DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.DECIMAL(10, 5) }).field("cost", DataTypes.DOUBLE(), "`d` * `bi`").build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with character types
tablePath = new ObjectPath(db1, "generic2");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "c");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "CHAR(265)");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "vc");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "VARCHAR(65536)");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "s");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "VARCHAR(2147483647)");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "b");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BINARY(1)");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "vb");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "VARBINARY(255)");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "bs");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "VARBINARY(2147483647)");
hiveTable.getParameters().put("flink.generic.table.schema.6.name", "len");
hiveTable.getParameters().put("flink.generic.table.schema.6.expr", "CHAR_LENGTH(`s`)");
hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "INT");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "c", "vc", "s", "b", "vb", "bs" }, new DataType[] { DataTypes.CHAR(265), DataTypes.VARCHAR(65536), DataTypes.STRING(), DataTypes.BINARY(1), DataTypes.VARBINARY(255), DataTypes.BYTES() }).field("len", DataTypes.INT(), "CHAR_LENGTH(`s`)").build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with date/time types
tablePath = new ObjectPath(db1, "generic3");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "dt");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "DATE");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "t");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "TIME(0)");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "tstz");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "TIMESTAMP(6) WITH LOCAL TIME ZONE");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "ts");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "dt", "t", "ts", "tstz" }, new DataType[] { DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() }).watermark("ts", "ts", DataTypes.TIMESTAMP(3)).build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with complex/misc types
tablePath = new ObjectPath(db1, "generic4");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "a");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "ARRAY<INT>");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "m");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "MAP<BIGINT, TIMESTAMP(6)>");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "mul");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "MULTISET<DOUBLE>");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "r");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "ROW<`f1` INT, `f2` VARCHAR(2147483647)>");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "b");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "BOOLEAN");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "`ts` - INTERVAL '5' SECOND");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "a", "m", "mul", "r", "b", "ts" }, new DataType[] { DataTypes.ARRAY(DataTypes.INT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.TIMESTAMP()), DataTypes.MULTISET(DataTypes.DOUBLE()), DataTypes.ROW(DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING())), DataTypes.BOOLEAN(), DataTypes.TIMESTAMP(3) }).watermark("ts", "`ts` - INTERVAL '5' SECOND", DataTypes.TIMESTAMP(3)).build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
} finally {
catalog.dropDatabase(db1, true, true);
}
}
use of org.apache.flink.table.api.TableSchema in project flink by apache.
the class HiveCatalogITCase method testCsvTableViaAPI.
@Test
public void testCsvTableViaAPI() throws Exception {
TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
tableEnv.getConfig().addConfiguration(new Configuration().set(CoreOptions.DEFAULT_PARALLELISM, 1));
tableEnv.registerCatalog("myhive", hiveCatalog);
tableEnv.useCatalog("myhive");
final TableSchema schema = TableSchema.builder().field("name", DataTypes.STRING()).field("age", DataTypes.INT()).build();
final Map<String, String> sourceOptions = new HashMap<>();
sourceOptions.put("connector.type", "filesystem");
sourceOptions.put("connector.path", getClass().getResource("/csv/test.csv").getPath());
sourceOptions.put("format.type", "csv");
CatalogTable source = new CatalogTableImpl(schema, sourceOptions, "Comment.");
Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
final Map<String, String> sinkOptions = new HashMap<>();
sinkOptions.put("connector.type", "filesystem");
sinkOptions.put("connector.path", p.toAbsolutePath().toString());
sinkOptions.put("format.type", "csv");
CatalogTable sink = new CatalogTableImpl(schema, sinkOptions, "Comment.");
hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName), source, false);
hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
Table t = tableEnv.sqlQuery(String.format("select * from myhive.`default`.%s", sourceTableName));
List<Row> result = CollectionUtil.iteratorToList(t.execute().collect());
result.sort(Comparator.comparing(String::valueOf));
// assert query result
assertThat(result).containsExactly(Row.of("1", 1), Row.of("2", 2), Row.of("3", 3));
tableEnv.executeSql(String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s", sinkTableName, sourceTableName)).await();
// assert written result
File resultFile = new File(p.toAbsolutePath().toString());
BufferedReader reader = new BufferedReader(new FileReader(resultFile));
String readLine;
for (int i = 0; i < 3; i++) {
readLine = reader.readLine();
assertThat(readLine).isEqualTo(String.format("%d,%d", i + 1, i + 1));
}
// No more line
assertThat(reader.readLine()).isNull();
tableEnv.executeSql(String.format("DROP TABLE %s", sourceTableName));
tableEnv.executeSql(String.format("DROP TABLE %s", sinkTableName));
}
use of org.apache.flink.table.api.TableSchema in project flink by apache.
the class TableEnvHiveConnectorITCase method testPKConstraint.
@Test
public void testPKConstraint() throws Exception {
// While PK constraints are supported since Hive 2.1.0, the constraints cannot be RELY in
// 2.x versions.
// So let's only test for 3.x.
Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
TableEnvironment tableEnv = getTableEnvWithHiveCatalog();
tableEnv.executeSql("create database db1");
try {
// test rely PK constraints
tableEnv.executeSql("create table db1.tbl1 (x tinyint,y smallint,z int, primary key (x,z) disable novalidate rely)");
CatalogBaseTable catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl1"));
TableSchema tableSchema = catalogTable.getSchema();
assertTrue(tableSchema.getPrimaryKey().isPresent());
UniqueConstraint pk = tableSchema.getPrimaryKey().get();
assertEquals(2, pk.getColumns().size());
assertTrue(pk.getColumns().containsAll(Arrays.asList("x", "z")));
// test norely PK constraints
tableEnv.executeSql("create table db1.tbl2 (x tinyint,y smallint, primary key (x) disable norely)");
catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl2"));
tableSchema = catalogTable.getSchema();
assertFalse(tableSchema.getPrimaryKey().isPresent());
// test table w/o PK
tableEnv.executeSql("create table db1.tbl3 (x tinyint)");
catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl3"));
tableSchema = catalogTable.getSchema();
assertFalse(tableSchema.getPrimaryKey().isPresent());
} finally {
tableEnv.executeSql("drop database db1 cascade");
}
}
use of org.apache.flink.table.api.TableSchema in project flink by apache.
the class TableEnvHiveConnectorITCase method testNotNullConstraints.
@Test
public void testNotNullConstraints() throws Exception {
Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
TableEnvironment tableEnv = getTableEnvWithHiveCatalog();
tableEnv.executeSql("create database db1");
try {
tableEnv.executeSql("create table db1.tbl (x int,y bigint not null enable rely,z string not null enable norely)");
CatalogBaseTable catalogTable = hiveCatalog.getTable(new ObjectPath("db1", "tbl"));
TableSchema tableSchema = catalogTable.getSchema();
assertTrue("By default columns should be nullable", tableSchema.getFieldDataTypes()[0].getLogicalType().isNullable());
assertFalse("NOT NULL columns should be reflected in table schema", tableSchema.getFieldDataTypes()[1].getLogicalType().isNullable());
assertTrue("NOT NULL NORELY columns should be considered nullable", tableSchema.getFieldDataTypes()[2].getLogicalType().isNullable());
} finally {
tableEnv.executeSql("drop database db1 cascade");
}
}
use of org.apache.flink.table.api.TableSchema in project flink by apache.
the class HiveDialectITCase method testCreateTableWithConstraints.
@Test
public void testCreateTableWithConstraints() throws Exception {
Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
tableEnv.executeSql("create table tbl (x int,y int not null disable novalidate rely,z int not null disable novalidate norely," + "constraint pk_name primary key (x) disable rely)");
CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(new ObjectPath("default", "tbl"));
TableSchema tableSchema = catalogTable.getSchema();
assertTrue("PK not present", tableSchema.getPrimaryKey().isPresent());
assertEquals("pk_name", tableSchema.getPrimaryKey().get().getName());
assertFalse("PK cannot be null", tableSchema.getFieldDataTypes()[0].getLogicalType().isNullable());
assertFalse("RELY NOT NULL should be reflected in schema", tableSchema.getFieldDataTypes()[1].getLogicalType().isNullable());
assertTrue("NORELY NOT NULL shouldn't be reflected in schema", tableSchema.getFieldDataTypes()[2].getLogicalType().isNullable());
}
Aggregations