Search in sources :

Example 1 with CatalogTable

use of org.apache.flink.table.catalog.CatalogTable in project flink by apache.

the class HiveDynamicTableFactory method createDynamicTableSource.

@Override
public DynamicTableSource createDynamicTableSource(Context context) {
    final ReadableConfig configuration = Configuration.fromMap(context.getCatalogTable().getOptions());
    final boolean isHiveTable = HiveCatalog.isHiveTable(context.getCatalogTable().getOptions());
    // we don't support temporary hive tables yet
    if (!isHiveTable || context.isTemporary()) {
        DynamicTableSource source = FactoryUtil.createDynamicTableSource(null, context.getObjectIdentifier(), context.getCatalogTable(), context.getConfiguration(), context.getClassLoader(), context.isTemporary());
        if (source instanceof RequireCatalogLock) {
            ((RequireCatalogLock) source).setLockFactory(HiveCatalogLock.createFactory(hiveConf));
        }
        return source;
    }
    final CatalogTable catalogTable = Preconditions.checkNotNull(context.getCatalogTable());
    final boolean isStreamingSource = configuration.get(STREAMING_SOURCE_ENABLE);
    final boolean includeAllPartition = STREAMING_SOURCE_PARTITION_INCLUDE.defaultValue().equals(configuration.get(STREAMING_SOURCE_PARTITION_INCLUDE));
    final JobConf jobConf = JobConfUtils.createJobConfWithCredentials(hiveConf);
    // hive table source that has not lookup ability
    if (isStreamingSource && includeAllPartition) {
        return new HiveTableSource(jobConf, context.getConfiguration(), context.getObjectIdentifier().toObjectPath(), catalogTable);
    } else {
        // hive table source that has scan and lookup ability
        return new HiveLookupTableSource(jobConf, context.getConfiguration(), context.getObjectIdentifier().toObjectPath(), catalogTable);
    }
}
Also used : ReadableConfig(org.apache.flink.configuration.ReadableConfig) RequireCatalogLock(org.apache.flink.table.connector.RequireCatalogLock) CatalogTable(org.apache.flink.table.catalog.CatalogTable) JobConf(org.apache.hadoop.mapred.JobConf) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource)

Example 2 with CatalogTable

use of org.apache.flink.table.catalog.CatalogTable in project flink by apache.

the class HiveTableUtil method instantiateHiveTable.

public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf, boolean managedTable) {
    final boolean isView = table instanceof CatalogView;
    // let Hive set default parameters for us, e.g. serialization.format
    Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName());
    hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));
    Map<String, String> properties = new HashMap<>(table.getOptions());
    if (managedTable) {
        properties.put(CONNECTOR.key(), ManagedTableFactory.DEFAULT_IDENTIFIER);
    }
    // Table comment
    if (table.getComment() != null) {
        properties.put(HiveCatalogConfig.COMMENT, table.getComment());
    }
    boolean isHiveTable = HiveCatalog.isHiveTable(properties);
    // Hive table's StorageDescriptor
    StorageDescriptor sd = hiveTable.getSd();
    HiveTableUtil.setDefaultStorageFormat(sd, hiveConf);
    // because hive cannot understand the expanded query anyway
    if (isHiveTable && !isView) {
        HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf);
        List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());
        // Table columns and partition keys
        if (table instanceof CatalogTable) {
            CatalogTable catalogTable = (CatalogTable) table;
            if (catalogTable.isPartitioned()) {
                int partitionKeySize = catalogTable.getPartitionKeys().size();
                List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
                List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());
                sd.setCols(regularColumns);
                hiveTable.setPartitionKeys(partitionColumns);
            } else {
                sd.setCols(allColumns);
                hiveTable.setPartitionKeys(new ArrayList<>());
            }
        } else {
            sd.setCols(allColumns);
        }
        // Table properties
        hiveTable.getParameters().putAll(properties);
    } else {
        DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
        tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema());
        if (table instanceof CatalogTable) {
            tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys());
        }
        properties.putAll(tableSchemaProps.asMap());
        properties = maskFlinkProperties(properties);
        // 2. when creating views which don't have connector properties
        if (isView || (!properties.containsKey(FLINK_PROPERTY_PREFIX + CONNECTOR.key()) && !properties.containsKey(FLINK_PROPERTY_PREFIX + CONNECTOR_TYPE))) {
            properties.put(IS_GENERIC, "true");
        }
        hiveTable.setParameters(properties);
    }
    if (isView) {
        // TODO: [FLINK-12398] Support partitioned view in catalog API
        hiveTable.setPartitionKeys(new ArrayList<>());
        CatalogView view = (CatalogView) table;
        hiveTable.setViewOriginalText(view.getOriginalQuery());
        hiveTable.setViewExpandedText(view.getExpandedQuery());
        hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
    }
    return hiveTable;
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlAlterHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) DescriptorProperties(org.apache.flink.table.descriptors.DescriptorProperties) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) CatalogTable(org.apache.flink.table.catalog.CatalogTable) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) CatalogView(org.apache.flink.table.catalog.CatalogView)

Example 3 with CatalogTable

use of org.apache.flink.table.catalog.CatalogTable in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 4 with CatalogTable

use of org.apache.flink.table.catalog.CatalogTable in project flink by apache.

the class HiveCatalogDataTypeTest method verifyDataTypes.

private void verifyDataTypes(DataType[] types) throws Exception {
    CatalogTable table = createCatalogTable(types);
    catalog.createDatabase(db1, createDb(), false);
    catalog.createTable(path1, table, false);
    assertEquals(table.getSchema(), catalog.getTable(path1).getSchema());
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable)

Example 5 with CatalogTable

use of org.apache.flink.table.catalog.CatalogTable in project flink by apache.

the class HiveCatalogDataTypeTest method testNonSupportedBinaryDataTypes.

@Test
public void testNonSupportedBinaryDataTypes() throws Exception {
    DataType[] types = new DataType[] { DataTypes.BINARY(BinaryType.MAX_LENGTH) };
    CatalogTable table = createCatalogTable(types);
    catalog.createDatabase(db1, createDb(), false);
    exception.expect(UnsupportedOperationException.class);
    catalog.createTable(path1, table, false);
}
Also used : DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Aggregations

CatalogTable (org.apache.flink.table.catalog.CatalogTable)68 Test (org.junit.Test)35 HashMap (java.util.HashMap)30 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)24 TableSchema (org.apache.flink.table.api.TableSchema)17 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)17 CreateTableOperation (org.apache.flink.table.operations.ddl.CreateTableOperation)14 ValidationException (org.apache.flink.table.api.ValidationException)13 ObjectPath (org.apache.flink.table.catalog.ObjectPath)13 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)12 Operation (org.apache.flink.table.operations.Operation)12 AlterTableAddConstraintOperation (org.apache.flink.table.operations.ddl.AlterTableAddConstraintOperation)12 AlterTableDropConstraintOperation (org.apache.flink.table.operations.ddl.AlterTableDropConstraintOperation)12 AlterTableOptionsOperation (org.apache.flink.table.operations.ddl.AlterTableOptionsOperation)12 AlterTableRenameOperation (org.apache.flink.table.operations.ddl.AlterTableRenameOperation)12 ExplainOperation (org.apache.flink.table.operations.ExplainOperation)11 LoadModuleOperation (org.apache.flink.table.operations.LoadModuleOperation)11 QueryOperation (org.apache.flink.table.operations.QueryOperation)11 ShowFunctionsOperation (org.apache.flink.table.operations.ShowFunctionsOperation)11 ShowModulesOperation (org.apache.flink.table.operations.ShowModulesOperation)11