use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveDeserializeExceptionTest method parameters.
@Parameterized.Parameters(name = "{1}")
public static Object[] parameters() {
HiveWriterFactory writerFactory = new HiveWriterFactory(new JobConf(), HiveIgnoreKeyTextOutputFormat.class, new SerDeInfo(), TableSchema.builder().build(), new String[0], new Properties(), HiveShimLoader.loadHiveShim(HiveShimLoader.getHiveVersion()), false);
HiveCompactReaderFactory compactReaderFactory = new HiveCompactReaderFactory(new StorageDescriptor(), new Properties(), new JobConf(), new CatalogTableImpl(TableSchema.builder().build(), Collections.emptyMap(), null), HiveShimLoader.getHiveVersion(), RowType.of(DataTypes.INT().getLogicalType()), false);
HiveSourceBuilder builder = new HiveSourceBuilder(new JobConf(), new Configuration(), new ObjectPath("default", "foo"), HiveShimLoader.getHiveVersion(), new CatalogTableImpl(TableSchema.builder().field("i", DataTypes.INT()).build(), Collections.emptyMap(), null));
builder.setPartitions(Collections.singletonList(new HiveTablePartition(new StorageDescriptor(), new Properties())));
HiveSource<RowData> hiveSource = builder.buildWithDefaultBulkFormat();
return new Object[][] { new Object[] { writerFactory, writerFactory.getClass().getSimpleName() }, new Object[] { compactReaderFactory, compactReaderFactory.getClass().getSimpleName() }, new Object[] { hiveSource, hiveSource.getClass().getSimpleName() } };
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveInputFormatPartitionReaderITCase method testReadFormat.
private void testReadFormat(TableEnvironment tableEnv, HiveCatalog hiveCatalog, String format) throws Exception {
String tableName = prepareData(tableEnv, format);
ObjectPath tablePath = new ObjectPath("default", tableName);
TableSchema tableSchema = hiveCatalog.getTable(tablePath).getSchema();
// create partition reader
HiveInputFormatPartitionReader partitionReader = new HiveInputFormatPartitionReader(new Configuration(), new JobConf(hiveCatalog.getHiveConf()), hiveCatalog.getHiveVersion(), tablePath, tableSchema.getFieldDataTypes(), tableSchema.getFieldNames(), Collections.emptyList(), null, false);
Table hiveTable = hiveCatalog.getHiveTable(tablePath);
// create HiveTablePartition to read from
HiveTablePartition tablePartition = new HiveTablePartition(hiveTable.getSd(), HiveReflectionUtils.getTableMetadata(HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion()), hiveTable));
partitionReader.open(Collections.singletonList(tablePartition));
GenericRowData reuse = new GenericRowData(tableSchema.getFieldCount());
int count = 0;
// this follows the way the partition reader is used during lookup join
while (partitionReader.read(reuse) != null) {
count++;
}
assertEquals(CollectionUtil.iteratorToList(tableEnv.executeSql("select * from " + tableName).collect()).size(), count);
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveCatalogGenericMetadataTest method testTableSchemaCompatibility.
@Test
public // NOTE: Be careful to modify this test, it is important to backward compatibility
void testTableSchemaCompatibility() throws Exception {
catalog.createDatabase(db1, createDb(), false);
try {
// table with numeric types
ObjectPath tablePath = new ObjectPath(db1, "generic1");
Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "ti");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "TINYINT");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "si");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "SMALLINT");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "i");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "INT");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "bi");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BIGINT");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "f");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "FLOAT");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "d");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "DOUBLE");
hiveTable.getParameters().put("flink.generic.table.schema.6.name", "de");
hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "DECIMAL(10, 5)");
hiveTable.getParameters().put("flink.generic.table.schema.7.name", "cost");
hiveTable.getParameters().put("flink.generic.table.schema.7.expr", "`d` * `bi`");
hiveTable.getParameters().put("flink.generic.table.schema.7.data-type", "DOUBLE");
((HiveCatalog) catalog).client.createTable(hiveTable);
CatalogBaseTable catalogBaseTable = catalog.getTable(tablePath);
assertFalse(HiveCatalog.isHiveTable(catalogBaseTable.getOptions()));
TableSchema expectedSchema = TableSchema.builder().fields(new String[] { "ti", "si", "i", "bi", "f", "d", "de" }, new DataType[] { DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.DECIMAL(10, 5) }).field("cost", DataTypes.DOUBLE(), "`d` * `bi`").build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with character types
tablePath = new ObjectPath(db1, "generic2");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "c");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "CHAR(265)");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "vc");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "VARCHAR(65536)");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "s");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "VARCHAR(2147483647)");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "b");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BINARY(1)");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "vb");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "VARBINARY(255)");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "bs");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "VARBINARY(2147483647)");
hiveTable.getParameters().put("flink.generic.table.schema.6.name", "len");
hiveTable.getParameters().put("flink.generic.table.schema.6.expr", "CHAR_LENGTH(`s`)");
hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "INT");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "c", "vc", "s", "b", "vb", "bs" }, new DataType[] { DataTypes.CHAR(265), DataTypes.VARCHAR(65536), DataTypes.STRING(), DataTypes.BINARY(1), DataTypes.VARBINARY(255), DataTypes.BYTES() }).field("len", DataTypes.INT(), "CHAR_LENGTH(`s`)").build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with date/time types
tablePath = new ObjectPath(db1, "generic3");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "dt");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "DATE");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "t");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "TIME(0)");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "tstz");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "TIMESTAMP(6) WITH LOCAL TIME ZONE");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "ts");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "dt", "t", "ts", "tstz" }, new DataType[] { DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() }).watermark("ts", "ts", DataTypes.TIMESTAMP(3)).build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
// table with complex/misc types
tablePath = new ObjectPath(db1, "generic4");
hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
hiveTable.setDbName(tablePath.getDatabaseName());
hiveTable.setTableName(tablePath.getObjectName());
setLegacyGeneric(hiveTable.getParameters());
hiveTable.setTableName(tablePath.getObjectName());
hiveTable.getParameters().put("flink.generic.table.schema.0.name", "a");
hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "ARRAY<INT>");
hiveTable.getParameters().put("flink.generic.table.schema.1.name", "m");
hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "MAP<BIGINT, TIMESTAMP(6)>");
hiveTable.getParameters().put("flink.generic.table.schema.2.name", "mul");
hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "MULTISET<DOUBLE>");
hiveTable.getParameters().put("flink.generic.table.schema.3.name", "r");
hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "ROW<`f1` INT, `f2` VARCHAR(2147483647)>");
hiveTable.getParameters().put("flink.generic.table.schema.4.name", "b");
hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "BOOLEAN");
hiveTable.getParameters().put("flink.generic.table.schema.5.name", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "`ts` - INTERVAL '5' SECOND");
((HiveCatalog) catalog).client.createTable(hiveTable);
catalogBaseTable = catalog.getTable(tablePath);
expectedSchema = TableSchema.builder().fields(new String[] { "a", "m", "mul", "r", "b", "ts" }, new DataType[] { DataTypes.ARRAY(DataTypes.INT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.TIMESTAMP()), DataTypes.MULTISET(DataTypes.DOUBLE()), DataTypes.ROW(DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING())), DataTypes.BOOLEAN(), DataTypes.TIMESTAMP(3) }).watermark("ts", "`ts` - INTERVAL '5' SECOND", DataTypes.TIMESTAMP(3)).build();
assertEquals(expectedSchema, catalogBaseTable.getSchema());
} finally {
catalog.dropDatabase(db1, true, true);
}
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveCatalogITCase method testCreateTableLike.
@Test
public void testCreateTableLike() throws Exception {
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode();
tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
tableEnv.useCatalog(hiveCatalog.getName());
tableEnv.executeSql("create table generic_table (x int) with ('connector'='COLLECTION')");
tableEnv.useCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG);
tableEnv.executeSql(String.format("create table copy like `%s`.`default`.generic_table", hiveCatalog.getName()));
Catalog builtInCat = tableEnv.getCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG).get();
CatalogBaseTable catalogTable = builtInCat.getTable(new ObjectPath(EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "copy"));
assertThat(catalogTable.getOptions()).hasSize(1);
assertThat(catalogTable.getOptions()).containsEntry(FactoryUtil.CONNECTOR.key(), "COLLECTION");
assertThat(catalogTable.getSchema().getFieldCount()).isEqualTo(1);
assertThat(catalogTable.getSchema().getFieldNames()).hasSameElementsAs(Collections.singletonList("x"));
assertThat(catalogTable.getSchema().getFieldDataTypes()).hasSameElementsAs(Collections.singletonList(DataTypes.INT()));
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveCatalogITCase method testCsvTableViaAPI.
@Test
public void testCsvTableViaAPI() throws Exception {
TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
tableEnv.getConfig().addConfiguration(new Configuration().set(CoreOptions.DEFAULT_PARALLELISM, 1));
tableEnv.registerCatalog("myhive", hiveCatalog);
tableEnv.useCatalog("myhive");
final TableSchema schema = TableSchema.builder().field("name", DataTypes.STRING()).field("age", DataTypes.INT()).build();
final Map<String, String> sourceOptions = new HashMap<>();
sourceOptions.put("connector.type", "filesystem");
sourceOptions.put("connector.path", getClass().getResource("/csv/test.csv").getPath());
sourceOptions.put("format.type", "csv");
CatalogTable source = new CatalogTableImpl(schema, sourceOptions, "Comment.");
Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
final Map<String, String> sinkOptions = new HashMap<>();
sinkOptions.put("connector.type", "filesystem");
sinkOptions.put("connector.path", p.toAbsolutePath().toString());
sinkOptions.put("format.type", "csv");
CatalogTable sink = new CatalogTableImpl(schema, sinkOptions, "Comment.");
hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName), source, false);
hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
Table t = tableEnv.sqlQuery(String.format("select * from myhive.`default`.%s", sourceTableName));
List<Row> result = CollectionUtil.iteratorToList(t.execute().collect());
result.sort(Comparator.comparing(String::valueOf));
// assert query result
assertThat(result).containsExactly(Row.of("1", 1), Row.of("2", 2), Row.of("3", 3));
tableEnv.executeSql(String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s", sinkTableName, sourceTableName)).await();
// assert written result
File resultFile = new File(p.toAbsolutePath().toString());
BufferedReader reader = new BufferedReader(new FileReader(resultFile));
String readLine;
for (int i = 0; i < 3; i++) {
readLine = reader.readLine();
assertThat(readLine).isEqualTo(String.format("%d,%d", i + 1, i + 1));
}
// No more line
assertThat(reader.readLine()).isNull();
tableEnv.executeSql(String.format("DROP TABLE %s", sourceTableName));
tableEnv.executeSql(String.format("DROP TABLE %s", sinkTableName));
}
Aggregations