Search in sources :

Example 56 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveDeserializeExceptionTest method parameters.

@Parameterized.Parameters(name = "{1}")
public static Object[] parameters() {
    HiveWriterFactory writerFactory = new HiveWriterFactory(new JobConf(), HiveIgnoreKeyTextOutputFormat.class, new SerDeInfo(), TableSchema.builder().build(), new String[0], new Properties(), HiveShimLoader.loadHiveShim(HiveShimLoader.getHiveVersion()), false);
    HiveCompactReaderFactory compactReaderFactory = new HiveCompactReaderFactory(new StorageDescriptor(), new Properties(), new JobConf(), new CatalogTableImpl(TableSchema.builder().build(), Collections.emptyMap(), null), HiveShimLoader.getHiveVersion(), RowType.of(DataTypes.INT().getLogicalType()), false);
    HiveSourceBuilder builder = new HiveSourceBuilder(new JobConf(), new Configuration(), new ObjectPath("default", "foo"), HiveShimLoader.getHiveVersion(), new CatalogTableImpl(TableSchema.builder().field("i", DataTypes.INT()).build(), Collections.emptyMap(), null));
    builder.setPartitions(Collections.singletonList(new HiveTablePartition(new StorageDescriptor(), new Properties())));
    HiveSource<RowData> hiveSource = builder.buildWithDefaultBulkFormat();
    return new Object[][] { new Object[] { writerFactory, writerFactory.getClass().getSimpleName() }, new Object[] { compactReaderFactory, compactReaderFactory.getClass().getSimpleName() }, new Object[] { hiveSource, hiveSource.getClass().getSimpleName() } };
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Configuration(org.apache.flink.configuration.Configuration) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HiveCompactReaderFactory(org.apache.flink.connectors.hive.read.HiveCompactReaderFactory) Properties(java.util.Properties) RowData(org.apache.flink.table.data.RowData) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) HiveWriterFactory(org.apache.flink.connectors.hive.write.HiveWriterFactory) JobConf(org.apache.hadoop.mapred.JobConf)

Example 57 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveInputFormatPartitionReaderITCase method testReadFormat.

private void testReadFormat(TableEnvironment tableEnv, HiveCatalog hiveCatalog, String format) throws Exception {
    String tableName = prepareData(tableEnv, format);
    ObjectPath tablePath = new ObjectPath("default", tableName);
    TableSchema tableSchema = hiveCatalog.getTable(tablePath).getSchema();
    // create partition reader
    HiveInputFormatPartitionReader partitionReader = new HiveInputFormatPartitionReader(new Configuration(), new JobConf(hiveCatalog.getHiveConf()), hiveCatalog.getHiveVersion(), tablePath, tableSchema.getFieldDataTypes(), tableSchema.getFieldNames(), Collections.emptyList(), null, false);
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    // create HiveTablePartition to read from
    HiveTablePartition tablePartition = new HiveTablePartition(hiveTable.getSd(), HiveReflectionUtils.getTableMetadata(HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion()), hiveTable));
    partitionReader.open(Collections.singletonList(tablePartition));
    GenericRowData reuse = new GenericRowData(tableSchema.getFieldCount());
    int count = 0;
    // this follows the way the partition reader is used during lookup join
    while (partitionReader.read(reuse) != null) {
        count++;
    }
    assertEquals(CollectionUtil.iteratorToList(tableEnv.executeSql("select * from " + tableName).collect()).size(), count);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveTablePartition(org.apache.flink.connectors.hive.HiveTablePartition) Table(org.apache.hadoop.hive.metastore.api.Table) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) GenericRowData(org.apache.flink.table.data.GenericRowData) JobConf(org.apache.hadoop.mapred.JobConf)

Example 58 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveCatalogGenericMetadataTest method testTableSchemaCompatibility.

@Test
public // NOTE: Be careful to modify this test, it is important to backward compatibility
void testTableSchemaCompatibility() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    try {
        // table with numeric types
        ObjectPath tablePath = new ObjectPath(db1, "generic1");
        Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "ti");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "TINYINT");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "si");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "SMALLINT");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "i");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "INT");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "bi");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BIGINT");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "f");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "FLOAT");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "d");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "DOUBLE");
        hiveTable.getParameters().put("flink.generic.table.schema.6.name", "de");
        hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "DECIMAL(10, 5)");
        hiveTable.getParameters().put("flink.generic.table.schema.7.name", "cost");
        hiveTable.getParameters().put("flink.generic.table.schema.7.expr", "`d` * `bi`");
        hiveTable.getParameters().put("flink.generic.table.schema.7.data-type", "DOUBLE");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        CatalogBaseTable catalogBaseTable = catalog.getTable(tablePath);
        assertFalse(HiveCatalog.isHiveTable(catalogBaseTable.getOptions()));
        TableSchema expectedSchema = TableSchema.builder().fields(new String[] { "ti", "si", "i", "bi", "f", "d", "de" }, new DataType[] { DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.DECIMAL(10, 5) }).field("cost", DataTypes.DOUBLE(), "`d` * `bi`").build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with character types
        tablePath = new ObjectPath(db1, "generic2");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "c");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "CHAR(265)");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "vc");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "VARCHAR(65536)");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "s");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "VARCHAR(2147483647)");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "b");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "BINARY(1)");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "vb");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "VARBINARY(255)");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "bs");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "VARBINARY(2147483647)");
        hiveTable.getParameters().put("flink.generic.table.schema.6.name", "len");
        hiveTable.getParameters().put("flink.generic.table.schema.6.expr", "CHAR_LENGTH(`s`)");
        hiveTable.getParameters().put("flink.generic.table.schema.6.data-type", "INT");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "c", "vc", "s", "b", "vb", "bs" }, new DataType[] { DataTypes.CHAR(265), DataTypes.VARCHAR(65536), DataTypes.STRING(), DataTypes.BINARY(1), DataTypes.VARBINARY(255), DataTypes.BYTES() }).field("len", DataTypes.INT(), "CHAR_LENGTH(`s`)").build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with date/time types
        tablePath = new ObjectPath(db1, "generic3");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "dt");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "DATE");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "t");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "TIME(0)");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "tstz");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "TIMESTAMP(6) WITH LOCAL TIME ZONE");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "ts");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "dt", "t", "ts", "tstz" }, new DataType[] { DataTypes.DATE(), DataTypes.TIME(), DataTypes.TIMESTAMP(3), DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() }).watermark("ts", "ts", DataTypes.TIMESTAMP(3)).build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
        // table with complex/misc types
        tablePath = new ObjectPath(db1, "generic4");
        hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
        hiveTable.setDbName(tablePath.getDatabaseName());
        hiveTable.setTableName(tablePath.getObjectName());
        setLegacyGeneric(hiveTable.getParameters());
        hiveTable.setTableName(tablePath.getObjectName());
        hiveTable.getParameters().put("flink.generic.table.schema.0.name", "a");
        hiveTable.getParameters().put("flink.generic.table.schema.0.data-type", "ARRAY<INT>");
        hiveTable.getParameters().put("flink.generic.table.schema.1.name", "m");
        hiveTable.getParameters().put("flink.generic.table.schema.1.data-type", "MAP<BIGINT, TIMESTAMP(6)>");
        hiveTable.getParameters().put("flink.generic.table.schema.2.name", "mul");
        hiveTable.getParameters().put("flink.generic.table.schema.2.data-type", "MULTISET<DOUBLE>");
        hiveTable.getParameters().put("flink.generic.table.schema.3.name", "r");
        hiveTable.getParameters().put("flink.generic.table.schema.3.data-type", "ROW<`f1` INT, `f2` VARCHAR(2147483647)>");
        hiveTable.getParameters().put("flink.generic.table.schema.4.name", "b");
        hiveTable.getParameters().put("flink.generic.table.schema.4.data-type", "BOOLEAN");
        hiveTable.getParameters().put("flink.generic.table.schema.5.name", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.5.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.rowtime", "ts");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.data-type", "TIMESTAMP(3)");
        hiveTable.getParameters().put("flink.generic.table.schema.watermark.0.strategy.expr", "`ts` - INTERVAL '5' SECOND");
        ((HiveCatalog) catalog).client.createTable(hiveTable);
        catalogBaseTable = catalog.getTable(tablePath);
        expectedSchema = TableSchema.builder().fields(new String[] { "a", "m", "mul", "r", "b", "ts" }, new DataType[] { DataTypes.ARRAY(DataTypes.INT()), DataTypes.MAP(DataTypes.BIGINT(), DataTypes.TIMESTAMP()), DataTypes.MULTISET(DataTypes.DOUBLE()), DataTypes.ROW(DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING())), DataTypes.BOOLEAN(), DataTypes.TIMESTAMP(3) }).watermark("ts", "`ts` - INTERVAL '5' SECOND", DataTypes.TIMESTAMP(3)).build();
        assertEquals(expectedSchema, catalogBaseTable.getSchema());
    } finally {
        catalog.dropDatabase(db1, true, true);
    }
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) TableSchema(org.apache.flink.table.api.TableSchema) DataType(org.apache.flink.table.types.DataType) Test(org.junit.Test)

Example 59 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveCatalogITCase method testCreateTableLike.

@Test
public void testCreateTableLike() throws Exception {
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode();
    tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tableEnv.useCatalog(hiveCatalog.getName());
    tableEnv.executeSql("create table generic_table (x int) with ('connector'='COLLECTION')");
    tableEnv.useCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG);
    tableEnv.executeSql(String.format("create table copy like `%s`.`default`.generic_table", hiveCatalog.getName()));
    Catalog builtInCat = tableEnv.getCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG).get();
    CatalogBaseTable catalogTable = builtInCat.getTable(new ObjectPath(EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "copy"));
    assertThat(catalogTable.getOptions()).hasSize(1);
    assertThat(catalogTable.getOptions()).containsEntry(FactoryUtil.CONNECTOR.key(), "COLLECTION");
    assertThat(catalogTable.getSchema().getFieldCount()).isEqualTo(1);
    assertThat(catalogTable.getSchema().getFieldNames()).hasSameElementsAs(Collections.singletonList("x"));
    assertThat(catalogTable.getSchema().getFieldDataTypes()).hasSameElementsAs(Collections.singletonList(DataTypes.INT()));
}
Also used : CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Catalog(org.apache.flink.table.catalog.Catalog) Test(org.junit.Test)

Example 60 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveCatalogITCase method testCsvTableViaAPI.

@Test
public void testCsvTableViaAPI() throws Exception {
    TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
    tableEnv.getConfig().addConfiguration(new Configuration().set(CoreOptions.DEFAULT_PARALLELISM, 1));
    tableEnv.registerCatalog("myhive", hiveCatalog);
    tableEnv.useCatalog("myhive");
    final TableSchema schema = TableSchema.builder().field("name", DataTypes.STRING()).field("age", DataTypes.INT()).build();
    final Map<String, String> sourceOptions = new HashMap<>();
    sourceOptions.put("connector.type", "filesystem");
    sourceOptions.put("connector.path", getClass().getResource("/csv/test.csv").getPath());
    sourceOptions.put("format.type", "csv");
    CatalogTable source = new CatalogTableImpl(schema, sourceOptions, "Comment.");
    Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
    final Map<String, String> sinkOptions = new HashMap<>();
    sinkOptions.put("connector.type", "filesystem");
    sinkOptions.put("connector.path", p.toAbsolutePath().toString());
    sinkOptions.put("format.type", "csv");
    CatalogTable sink = new CatalogTableImpl(schema, sinkOptions, "Comment.");
    hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName), source, false);
    hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
    Table t = tableEnv.sqlQuery(String.format("select * from myhive.`default`.%s", sourceTableName));
    List<Row> result = CollectionUtil.iteratorToList(t.execute().collect());
    result.sort(Comparator.comparing(String::valueOf));
    // assert query result
    assertThat(result).containsExactly(Row.of("1", 1), Row.of("2", 2), Row.of("3", 3));
    tableEnv.executeSql(String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s", sinkTableName, sourceTableName)).await();
    // assert written result
    File resultFile = new File(p.toAbsolutePath().toString());
    BufferedReader reader = new BufferedReader(new FileReader(resultFile));
    String readLine;
    for (int i = 0; i < 3; i++) {
        readLine = reader.readLine();
        assertThat(readLine).isEqualTo(String.format("%d,%d", i + 1, i + 1));
    }
    // No more line
    assertThat(reader.readLine()).isNull();
    tableEnv.executeSql(String.format("DROP TABLE %s", sourceTableName));
    tableEnv.executeSql(String.format("DROP TABLE %s", sinkTableName));
}
Also used : Path(java.nio.file.Path) ObjectPath(org.apache.flink.table.catalog.ObjectPath) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Configuration(org.apache.flink.configuration.Configuration) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Row(org.apache.flink.types.Row) File(java.io.File) Test(org.junit.Test)

Aggregations

ObjectPath (org.apache.flink.table.catalog.ObjectPath)81 Test (org.junit.Test)52 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)32 CatalogTable (org.apache.flink.table.catalog.CatalogTable)29 HashMap (java.util.HashMap)21 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)20 TableSchema (org.apache.flink.table.api.TableSchema)19 TableEnvironment (org.apache.flink.table.api.TableEnvironment)17 CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)12 Table (org.apache.hadoop.hive.metastore.api.Table)12 Configuration (org.apache.flink.configuration.Configuration)11 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)11 TableNotExistException (org.apache.flink.table.catalog.exceptions.TableNotExistException)9 ArrayList (java.util.ArrayList)8 Map (java.util.Map)8 GenericInMemoryCatalog (org.apache.flink.table.catalog.GenericInMemoryCatalog)8 LinkedHashMap (java.util.LinkedHashMap)7 Catalog (org.apache.flink.table.catalog.Catalog)7 ContextResolvedTable (org.apache.flink.table.catalog.ContextResolvedTable)6 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)6