Search in sources :

Example 1 with ObjectIdentifier

use of org.apache.flink.table.catalog.ObjectIdentifier in project flink by apache.

the class HiveParserDMLHelper method createInsertOperationInfo.

public Tuple4<ObjectIdentifier, QueryOperation, Map<String, String>, Boolean> createInsertOperationInfo(RelNode queryRelNode, Table destTable, Map<String, String> staticPartSpec, List<String> destSchema, boolean overwrite) throws SemanticException {
    // sanity check
    Preconditions.checkArgument(queryRelNode instanceof Project || queryRelNode instanceof Sort || queryRelNode instanceof LogicalDistribution, "Expect top RelNode to be Project, Sort, or LogicalDistribution, actually got " + queryRelNode);
    if (!(queryRelNode instanceof Project)) {
        RelNode parent = ((SingleRel) queryRelNode).getInput();
        // SEL + SORT or SEL + DIST + LIMIT
        Preconditions.checkArgument(parent instanceof Project || parent instanceof LogicalDistribution, "Expect input to be a Project or LogicalDistribution, actually got " + parent);
        if (parent instanceof LogicalDistribution) {
            RelNode grandParent = ((LogicalDistribution) parent).getInput();
            Preconditions.checkArgument(grandParent instanceof Project, "Expect input of LogicalDistribution to be a Project, actually got " + grandParent);
        }
    }
    // handle dest schema, e.g. insert into dest(.,.,.) select ...
    queryRelNode = handleDestSchema((SingleRel) queryRelNode, destTable, destSchema, staticPartSpec.keySet());
    // track each target col and its expected type
    RelDataTypeFactory typeFactory = plannerContext.getTypeFactory();
    LinkedHashMap<String, RelDataType> targetColToCalcType = new LinkedHashMap<>();
    List<TypeInfo> targetHiveTypes = new ArrayList<>();
    List<FieldSchema> allCols = new ArrayList<>(destTable.getCols());
    allCols.addAll(destTable.getPartCols());
    for (FieldSchema col : allCols) {
        TypeInfo hiveType = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
        targetHiveTypes.add(hiveType);
        targetColToCalcType.put(col.getName(), HiveParserTypeConverter.convert(hiveType, typeFactory));
    }
    // add static partitions to query source
    if (!staticPartSpec.isEmpty()) {
        if (queryRelNode instanceof Project) {
            queryRelNode = replaceProjectForStaticPart((Project) queryRelNode, staticPartSpec, destTable, targetColToCalcType);
        } else if (queryRelNode instanceof Sort) {
            Sort sort = (Sort) queryRelNode;
            RelNode oldInput = sort.getInput();
            RelNode newInput;
            if (oldInput instanceof LogicalDistribution) {
                newInput = replaceDistForStaticParts((LogicalDistribution) oldInput, destTable, staticPartSpec, targetColToCalcType);
            } else {
                newInput = replaceProjectForStaticPart((Project) oldInput, staticPartSpec, destTable, targetColToCalcType);
                // we may need to shift the field collations
                final int numDynmPart = destTable.getTTable().getPartitionKeys().size() - staticPartSpec.size();
                if (!sort.getCollation().getFieldCollations().isEmpty() && numDynmPart > 0) {
                    sort.replaceInput(0, null);
                    sort = LogicalSort.create(newInput, shiftRelCollation(sort.getCollation(), (Project) oldInput, staticPartSpec.size(), numDynmPart), sort.offset, sort.fetch);
                }
            }
            sort.replaceInput(0, newInput);
            queryRelNode = sort;
        } else {
            queryRelNode = replaceDistForStaticParts((LogicalDistribution) queryRelNode, destTable, staticPartSpec, targetColToCalcType);
        }
    }
    // add type conversions
    queryRelNode = addTypeConversions(plannerContext.getCluster().getRexBuilder(), queryRelNode, new ArrayList<>(targetColToCalcType.values()), targetHiveTypes, funcConverter);
    // create identifier
    List<String> targetTablePath = Arrays.asList(destTable.getDbName(), destTable.getTableName());
    UnresolvedIdentifier unresolvedIdentifier = UnresolvedIdentifier.of(targetTablePath);
    ObjectIdentifier identifier = catalogManager.qualifyIdentifier(unresolvedIdentifier);
    return Tuple4.of(identifier, new PlannerQueryOperation(queryRelNode), staticPartSpec, overwrite);
}
Also used : PlannerQueryOperation(org.apache.flink.table.planner.operations.PlannerQueryOperation) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) UnresolvedIdentifier(org.apache.flink.table.catalog.UnresolvedIdentifier) RelDataType(org.apache.calcite.rel.type.RelDataType) SingleRel(org.apache.calcite.rel.SingleRel) LogicalDistribution(org.apache.flink.table.planner.plan.nodes.hive.LogicalDistribution) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) LinkedHashMap(java.util.LinkedHashMap) Project(org.apache.calcite.rel.core.Project) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) RelNode(org.apache.calcite.rel.RelNode) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) Sort(org.apache.calcite.rel.core.Sort) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier)

Example 2 with ObjectIdentifier

use of org.apache.flink.table.catalog.ObjectIdentifier in project flink by apache.

the class HiveLookupJoinITCase method getLookupFunction.

private FileSystemLookupFunction<HiveTablePartition> getLookupFunction(String tableName) throws Exception {
    TableEnvironmentInternal tableEnvInternal = (TableEnvironmentInternal) tableEnv;
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", tableName);
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    HiveLookupTableSource hiveTableSource = (HiveLookupTableSource) FactoryUtil.createDynamicTableSource((DynamicTableSourceFactory) hiveCatalog.getFactory().orElseThrow(IllegalStateException::new), tableIdentifier, tableEnvInternal.getCatalogManager().resolveCatalogTable(catalogTable), tableEnv.getConfig().getConfiguration(), Thread.currentThread().getContextClassLoader(), false);
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = (FileSystemLookupFunction<HiveTablePartition>) hiveTableSource.getLookupFunction(new int[][] { { 0 } });
    return lookupFunction;
}
Also used : DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier)

Example 3 with ObjectIdentifier

use of org.apache.flink.table.catalog.ObjectIdentifier in project flink by apache.

the class HiveLookupJoinITCase method testPartitionFetcherAndReader.

@Test
public void testPartitionFetcherAndReader() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
    lookupFunction.open(null);
    PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
    PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
    List<HiveTablePartition> partitions = fetcher.fetch(context);
    // fetch latest partition by partition-name
    assertEquals(1, partitions.size());
    PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
    reader.open(partitions);
    List<RowData> res = new ArrayList<>();
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
    TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
    RowData row;
    while ((row = reader.read(reuse)) != null) {
        res.add(serializer.copy(row));
    }
    res.sort(Comparator.comparingInt(o -> o.getInt(0)));
    assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Also used : PartitionReader(org.apache.flink.connector.file.table.PartitionReader) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) Arrays(java.util.Arrays) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) CatalogTable(org.apache.flink.table.catalog.CatalogTable) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ArrayList(java.util.ArrayList) GenericRowData(org.apache.flink.table.data.GenericRowData) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) Duration(java.time.Duration) DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) TableImpl(org.apache.flink.table.api.internal.TableImpl) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) STREAMING_SOURCE_ENABLE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_ENABLE) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Row(org.apache.flink.types.Row) Comparator(java.util.Comparator) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) GenericRowData(org.apache.flink.table.data.GenericRowData) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Example 4 with ObjectIdentifier

use of org.apache.flink.table.catalog.ObjectIdentifier in project flink by apache.

the class HiveCatalogITCase method testCreateAndGetManagedTable.

@Test
public void testCreateAndGetManagedTable() throws Exception {
    TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
    String catalog = "myhive";
    String database = "default";
    String table = "managed_table";
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(catalog, database, table);
    try {
        TestManagedTableFactory.MANAGED_TABLES.put(tableIdentifier, new AtomicReference<>());
        tableEnv.registerCatalog(catalog, hiveCatalog);
        tableEnv.useCatalog(catalog);
        final String sql = String.format("CREATE TABLE %s (\n" + "  uuid varchar(40) not null,\n" + "  price DECIMAL(10, 2),\n" + "  currency STRING,\n" + "  ts6 TIMESTAMP(6),\n" + "  ts AS CAST(ts6 AS TIMESTAMP(3)),\n" + "  WATERMARK FOR ts AS ts,\n" + "  constraint ct1 PRIMARY KEY(uuid) NOT ENFORCED)\n", table);
        tableEnv.executeSql(sql);
        Map<String, String> expectedOptions = new HashMap<>();
        expectedOptions.put(TestManagedTableFactory.ENRICHED_KEY, TestManagedTableFactory.ENRICHED_VALUE);
        assertThat(TestManagedTableFactory.MANAGED_TABLES.get(tableIdentifier).get()).containsExactlyInAnyOrderEntriesOf(expectedOptions);
        Map<String, String> expectedParameters = new HashMap<>();
        expectedOptions.forEach((k, v) -> expectedParameters.put(FLINK_PROPERTY_PREFIX + k, v));
        expectedParameters.put(FLINK_PROPERTY_PREFIX + CONNECTOR.key(), ManagedTableFactory.DEFAULT_IDENTIFIER);
        assertThat(hiveCatalog.getHiveTable(tableIdentifier.toObjectPath()).getParameters()).containsAllEntriesOf(expectedParameters);
        assertThat(hiveCatalog.getTable(tableIdentifier.toObjectPath()).getOptions()).containsExactlyEntriesOf(Collections.singletonMap(TestManagedTableFactory.ENRICHED_KEY, TestManagedTableFactory.ENRICHED_VALUE));
    } finally {
        tableEnv.executeSql(String.format("DROP TABLE %s", table));
        assertThat(TestManagedTableFactory.MANAGED_TABLES.get(tableIdentifier).get()).isNull();
    }
}
Also used : HashMap(java.util.HashMap) TableEnvironment(org.apache.flink.table.api.TableEnvironment) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Example 5 with ObjectIdentifier

use of org.apache.flink.table.catalog.ObjectIdentifier in project flink by apache.

the class HiveParserDDLSemanticAnalyzer method convertAlterTableProps.

private Operation convertAlterTableProps(CatalogBaseTable oldBaseTable, String tableName, Map<String, String> partSpec, Map<String, String> newProps) {
    ObjectIdentifier tableIdentifier = parseObjectIdentifier(tableName);
    CatalogTable oldTable = (CatalogTable) oldBaseTable;
    CatalogPartitionSpec catalogPartitionSpec = partSpec != null ? new CatalogPartitionSpec(partSpec) : null;
    CatalogPartition catalogPartition = partSpec != null ? getPartition(tableIdentifier, catalogPartitionSpec) : null;
    Map<String, String> props = new HashMap<>();
    if (catalogPartition != null) {
        props.putAll(catalogPartition.getProperties());
        props.putAll(newProps);
        return new AlterPartitionPropertiesOperation(tableIdentifier, catalogPartitionSpec, new CatalogPartitionImpl(props, catalogPartition.getComment()));
    } else {
        props.putAll(oldTable.getOptions());
        props.putAll(newProps);
        return new AlterTableOptionsOperation(tableIdentifier, oldTable.copy(props));
    }
}
Also used : AlterTableOptionsOperation(org.apache.flink.table.operations.ddl.AlterTableOptionsOperation) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CatalogPartition(org.apache.flink.table.catalog.CatalogPartition) AlterPartitionPropertiesOperation(org.apache.flink.table.operations.ddl.AlterPartitionPropertiesOperation) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogPartitionImpl(org.apache.flink.table.catalog.CatalogPartitionImpl)

Aggregations

ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)60 CatalogTable (org.apache.flink.table.catalog.CatalogTable)21 ValidationException (org.apache.flink.table.api.ValidationException)20 UnresolvedIdentifier (org.apache.flink.table.catalog.UnresolvedIdentifier)20 HashMap (java.util.HashMap)19 LinkedHashMap (java.util.LinkedHashMap)16 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)14 ContextResolvedTable (org.apache.flink.table.catalog.ContextResolvedTable)13 ArrayList (java.util.ArrayList)10 ResolvedCatalogTable (org.apache.flink.table.catalog.ResolvedCatalogTable)10 Map (java.util.Map)9 UniqueConstraint (org.apache.flink.table.api.constraints.UniqueConstraint)9 CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)8 NotNullConstraint (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.NotNullConstraint)8 TableSchema (org.apache.flink.table.api.TableSchema)7 CatalogView (org.apache.flink.table.catalog.CatalogView)7 QueryOperation (org.apache.flink.table.operations.QueryOperation)6 TableException (org.apache.flink.table.api.TableException)5 HiveParserASTNode (org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode)5 List (java.util.List)4