Search in sources :

Example 26 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class CompactManagedTableITCase method scanFileEntries.

private void scanFileEntries() throws IOException {
    Map<CatalogPartitionSpec, List<Path>> managedTableFileEntries = new HashMap<>();
    try (Stream<java.nio.file.Path> pathStream = Files.walk(Paths.get(rootPath.getPath()))) {
        pathStream.filter(Files::isRegularFile).forEach(filePath -> {
            Path file = new Path(filePath.toString());
            CatalogPartitionSpec partitionSpec = new CatalogPartitionSpec(PartitionPathUtils.extractPartitionSpecFromPath(file));
            // for non-partitioned table, the map is empty
            List<Path> fileEntries = managedTableFileEntries.getOrDefault(partitionSpec, new ArrayList<>());
            fileEntries.add(file);
            managedTableFileEntries.put(partitionSpec, fileEntries);
            List<RowData> elements = collectedElements.getOrDefault(partitionSpec, new ArrayList<>());
            elements.addAll(readElementsFromFile(filePath.toFile()));
            collectedElements.put(partitionSpec, elements);
        });
    }
    referenceOfManagedTableFileEntries.set(managedTableFileEntries);
}
Also used : Path(org.apache.flink.core.fs.Path) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec)

Example 27 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class CompactManagedTableITCase method checkFileAndElements.

private Map<CatalogPartitionSpec, Long> checkFileAndElements(Set<CatalogPartitionSpec> resolvedPartitionSpecsHaveBeenOrToBeCompacted) {
    Map<CatalogPartitionSpec, Long> lastModifiedForEachPartition = new HashMap<>();
    Map<CatalogPartitionSpec, List<Path>> managedTableFileEntries = referenceOfManagedTableFileEntries.get();
    managedTableFileEntries.forEach((partitionSpec, fileEntries) -> {
        if (resolvedPartitionSpecsHaveBeenOrToBeCompacted.contains(partitionSpec)) {
            assertThat(fileEntries).hasSize(1);
            Path compactedFile = fileEntries.get(0);
            assertThat(compactedFile.getName()).startsWith("compact-");
            List<RowData> compactedElements = readElementsFromFile(new File(compactedFile.getPath()));
            assertThat(compactedElements).hasSameElementsAs(collectedElements.get(partitionSpec));
            lastModifiedForEachPartition.put(partitionSpec, getLastModifiedTime(compactedFile));
        } else {
            // check remaining partitions are untouched
            fileEntries.forEach(file -> {
                assertThat(file.getName()).startsWith("part-");
                List<RowData> elements = readElementsFromFile(new File(file.getPath()));
                assertThat(collectedElements.get(partitionSpec)).containsAll(elements);
            });
        }
    });
    return lastModifiedForEachPartition;
}
Also used : Path(org.apache.flink.core.fs.Path) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec)

Example 28 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class TestManagedSinkCommitter method commitDelete.

private void commitDelete(Map<CatalogPartitionSpec, Set<Path>> toDelete, Map<CatalogPartitionSpec, List<Path>> managedTableFileEntries) throws IOException {
    for (Map.Entry<CatalogPartitionSpec, Set<Path>> entry : toDelete.entrySet()) {
        CatalogPartitionSpec partitionSpec = entry.getKey();
        Set<Path> pathsToDelete = entry.getValue();
        for (Path path : pathsToDelete) {
            path.getFileSystem().delete(path, false);
        }
        List<Path> paths = managedTableFileEntries.get(partitionSpec);
        paths.removeAll(pathsToDelete);
        managedTableFileEntries.put(partitionSpec, paths);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec)

Example 29 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class SqlToOperationConverter method convertAlterTable.

/**
 * convert ALTER TABLE statement.
 */
private Operation convertAlterTable(SqlAlterTable sqlAlterTable) {
    UnresolvedIdentifier unresolvedIdentifier = UnresolvedIdentifier.of(sqlAlterTable.fullTableName());
    ObjectIdentifier tableIdentifier = catalogManager.qualifyIdentifier(unresolvedIdentifier);
    Optional<ContextResolvedTable> optionalCatalogTable = catalogManager.getTable(tableIdentifier);
    if (!optionalCatalogTable.isPresent() || optionalCatalogTable.get().isTemporary()) {
        throw new ValidationException(String.format("Table %s doesn't exist or is a temporary table.", tableIdentifier));
    }
    CatalogBaseTable baseTable = optionalCatalogTable.get().getTable();
    if (baseTable instanceof CatalogView) {
        throw new ValidationException("ALTER TABLE for a view is not allowed");
    }
    if (sqlAlterTable instanceof SqlAlterTableRename) {
        UnresolvedIdentifier newUnresolvedIdentifier = UnresolvedIdentifier.of(((SqlAlterTableRename) sqlAlterTable).fullNewTableName());
        ObjectIdentifier newTableIdentifier = catalogManager.qualifyIdentifier(newUnresolvedIdentifier);
        return new AlterTableRenameOperation(tableIdentifier, newTableIdentifier);
    } else if (sqlAlterTable instanceof SqlAlterTableOptions) {
        return convertAlterTableOptions(tableIdentifier, (CatalogTable) baseTable, (SqlAlterTableOptions) sqlAlterTable);
    } else if (sqlAlterTable instanceof SqlAlterTableReset) {
        return convertAlterTableReset(tableIdentifier, (CatalogTable) baseTable, (SqlAlterTableReset) sqlAlterTable);
    } else if (sqlAlterTable instanceof SqlAlterTableAddConstraint) {
        SqlTableConstraint constraint = ((SqlAlterTableAddConstraint) sqlAlterTable).getConstraint();
        validateTableConstraint(constraint);
        TableSchema oriSchema = TableSchema.fromResolvedSchema(baseTable.getUnresolvedSchema().resolve(catalogManager.getSchemaResolver()));
        // Sanity check for constraint.
        TableSchema.Builder builder = TableSchemaUtils.builderWithGivenSchema(oriSchema);
        if (constraint.getConstraintName().isPresent()) {
            builder.primaryKey(constraint.getConstraintName().get(), constraint.getColumnNames());
        } else {
            builder.primaryKey(constraint.getColumnNames());
        }
        builder.build();
        return new AlterTableAddConstraintOperation(tableIdentifier, constraint.getConstraintName().orElse(null), constraint.getColumnNames());
    } else if (sqlAlterTable instanceof SqlAlterTableDropConstraint) {
        SqlAlterTableDropConstraint dropConstraint = ((SqlAlterTableDropConstraint) sqlAlterTable);
        String constraintName = dropConstraint.getConstraintName().getSimple();
        TableSchema oriSchema = TableSchema.fromResolvedSchema(baseTable.getUnresolvedSchema().resolve(catalogManager.getSchemaResolver()));
        if (!oriSchema.getPrimaryKey().filter(pk -> pk.getName().equals(constraintName)).isPresent()) {
            throw new ValidationException(String.format("CONSTRAINT [%s] does not exist", constraintName));
        }
        return new AlterTableDropConstraintOperation(tableIdentifier, constraintName);
    } else if (sqlAlterTable instanceof SqlAddReplaceColumns) {
        return OperationConverterUtils.convertAddReplaceColumns(tableIdentifier, (SqlAddReplaceColumns) sqlAlterTable, (CatalogTable) baseTable, flinkPlanner.getOrCreateSqlValidator());
    } else if (sqlAlterTable instanceof SqlChangeColumn) {
        return OperationConverterUtils.convertChangeColumn(tableIdentifier, (SqlChangeColumn) sqlAlterTable, (CatalogTable) baseTable, flinkPlanner.getOrCreateSqlValidator());
    } else if (sqlAlterTable instanceof SqlAddPartitions) {
        List<CatalogPartitionSpec> specs = new ArrayList<>();
        List<CatalogPartition> partitions = new ArrayList<>();
        SqlAddPartitions addPartitions = (SqlAddPartitions) sqlAlterTable;
        for (int i = 0; i < addPartitions.getPartSpecs().size(); i++) {
            specs.add(new CatalogPartitionSpec(addPartitions.getPartitionKVs(i)));
            Map<String, String> props = OperationConverterUtils.extractProperties(addPartitions.getPartProps().get(i));
            partitions.add(new CatalogPartitionImpl(props, null));
        }
        return new AddPartitionsOperation(tableIdentifier, addPartitions.ifNotExists(), specs, partitions);
    } else if (sqlAlterTable instanceof SqlDropPartitions) {
        SqlDropPartitions dropPartitions = (SqlDropPartitions) sqlAlterTable;
        List<CatalogPartitionSpec> specs = new ArrayList<>();
        for (int i = 0; i < dropPartitions.getPartSpecs().size(); i++) {
            specs.add(new CatalogPartitionSpec(dropPartitions.getPartitionKVs(i)));
        }
        return new DropPartitionsOperation(tableIdentifier, dropPartitions.ifExists(), specs);
    } else if (sqlAlterTable instanceof SqlAlterTableCompact) {
        return convertAlterTableCompact(tableIdentifier, optionalCatalogTable.get(), (SqlAlterTableCompact) sqlAlterTable);
    } else {
        throw new ValidationException(String.format("[%s] needs to implement", sqlAlterTable.toSqlString(CalciteSqlDialect.DEFAULT)));
    }
}
Also used : ValidationException(org.apache.flink.table.api.ValidationException) TableSchema(org.apache.flink.table.api.TableSchema) SqlAlterTableReset(org.apache.flink.sql.parser.ddl.SqlAlterTableReset) ArrayList(java.util.ArrayList) AlterTableAddConstraintOperation(org.apache.flink.table.operations.ddl.AlterTableAddConstraintOperation) SqlAlterTableRename(org.apache.flink.sql.parser.ddl.SqlAlterTableRename) AlterTableDropConstraintOperation(org.apache.flink.table.operations.ddl.AlterTableDropConstraintOperation) SqlAddReplaceColumns(org.apache.flink.sql.parser.ddl.SqlAddReplaceColumns) SqlAddPartitions(org.apache.flink.sql.parser.ddl.SqlAddPartitions) SqlAlterTableAddConstraint(org.apache.flink.sql.parser.ddl.SqlAlterTableAddConstraint) SqlAlterTableCompact(org.apache.flink.sql.parser.ddl.SqlAlterTableCompact) ArrayList(java.util.ArrayList) List(java.util.List) SqlNodeList(org.apache.calcite.sql.SqlNodeList) SqlTableConstraint(org.apache.flink.sql.parser.ddl.constraint.SqlTableConstraint) CatalogView(org.apache.flink.table.catalog.CatalogView) AddPartitionsOperation(org.apache.flink.table.operations.ddl.AddPartitionsOperation) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) DropPartitionsOperation(org.apache.flink.table.operations.ddl.DropPartitionsOperation) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) UnresolvedIdentifier(org.apache.flink.table.catalog.UnresolvedIdentifier) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) SqlAlterTableAddConstraint(org.apache.flink.sql.parser.ddl.SqlAlterTableAddConstraint) SqlTableConstraint(org.apache.flink.sql.parser.ddl.constraint.SqlTableConstraint) RelHint(org.apache.calcite.rel.hint.RelHint) SqlAlterTableDropConstraint(org.apache.flink.sql.parser.ddl.SqlAlterTableDropConstraint) SqlChangeColumn(org.apache.flink.sql.parser.ddl.SqlChangeColumn) SqlAlterTableOptions(org.apache.flink.sql.parser.ddl.SqlAlterTableOptions) AlterTableRenameOperation(org.apache.flink.table.operations.ddl.AlterTableRenameOperation) SqlAlterTableDropConstraint(org.apache.flink.sql.parser.ddl.SqlAlterTableDropConstraint) ContextResolvedTable(org.apache.flink.table.catalog.ContextResolvedTable) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CatalogPartitionImpl(org.apache.flink.table.catalog.CatalogPartitionImpl) SqlDropPartitions(org.apache.flink.sql.parser.ddl.SqlDropPartitions)

Example 30 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class SqlToOperationConverter method convertAlterTableCompact.

/**
 * Convert `ALTER TABLE ... COMPACT` operation to {@link ModifyOperation} for Flink's managed
 * table to trigger a compaction batch job.
 */
private ModifyOperation convertAlterTableCompact(ObjectIdentifier tableIdentifier, ContextResolvedTable contextResolvedTable, SqlAlterTableCompact alterTableCompact) {
    Catalog catalog = catalogManager.getCatalog(tableIdentifier.getCatalogName()).orElse(null);
    ResolvedCatalogTable resolvedCatalogTable = contextResolvedTable.getResolvedTable();
    if (ManagedTableListener.isManagedTable(catalog, resolvedCatalogTable)) {
        Map<String, String> partitionKVs = alterTableCompact.getPartitionKVs();
        CatalogPartitionSpec partitionSpec = new CatalogPartitionSpec(Collections.emptyMap());
        if (partitionKVs != null) {
            List<String> partitionKeys = resolvedCatalogTable.getPartitionKeys();
            Set<String> validPartitionKeySet = new HashSet<>(partitionKeys);
            String exMsg = partitionKeys.isEmpty() ? String.format("Table %s is not partitioned.", tableIdentifier) : String.format("Available ordered partition columns: [%s]", partitionKeys.stream().collect(Collectors.joining("', '", "'", "'")));
            partitionKVs.forEach((partitionKey, partitionValue) -> {
                if (!validPartitionKeySet.contains(partitionKey)) {
                    throw new ValidationException(String.format("Partition column '%s' not defined in the table schema. %s", partitionKey, exMsg));
                }
            });
            partitionSpec = new CatalogPartitionSpec(partitionKVs);
        }
        Map<String, String> compactOptions = catalogManager.resolveCompactManagedTableOptions(resolvedCatalogTable, tableIdentifier, partitionSpec);
        QueryOperation child = new SourceQueryOperation(contextResolvedTable, compactOptions);
        return new SinkModifyOperation(contextResolvedTable, child, partitionSpec.getPartitionSpec(), false, compactOptions);
    }
    throw new ValidationException(String.format("ALTER TABLE COMPACT operation is not supported for non-managed table %s", tableIdentifier));
}
Also used : ValidationException(org.apache.flink.table.api.ValidationException) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) SinkModifyOperation(org.apache.flink.table.operations.SinkModifyOperation) SourceQueryOperation(org.apache.flink.table.operations.SourceQueryOperation) SqlShowCurrentCatalog(org.apache.flink.sql.parser.dql.SqlShowCurrentCatalog) Catalog(org.apache.flink.table.catalog.Catalog) SqlUseCatalog(org.apache.flink.sql.parser.ddl.SqlUseCatalog) SqlDropCatalog(org.apache.flink.sql.parser.ddl.SqlDropCatalog) SqlCreateCatalog(org.apache.flink.sql.parser.ddl.SqlCreateCatalog) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) HashSet(java.util.HashSet) QueryOperation(org.apache.flink.table.operations.QueryOperation) SourceQueryOperation(org.apache.flink.table.operations.SourceQueryOperation)

Aggregations

CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)32 HashMap (java.util.HashMap)20 LinkedHashMap (java.util.LinkedHashMap)15 ArrayList (java.util.ArrayList)11 Map (java.util.Map)11 ObjectPath (org.apache.flink.table.catalog.ObjectPath)11 List (java.util.List)10 CatalogTable (org.apache.flink.table.catalog.CatalogTable)10 Path (org.apache.flink.core.fs.Path)8 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)8 CatalogPartition (org.apache.flink.table.catalog.CatalogPartition)7 Test (org.junit.Test)7 HashSet (java.util.HashSet)6 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)6 CatalogPartitionImpl (org.apache.flink.table.catalog.CatalogPartitionImpl)6 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)6 ValidationException (org.apache.flink.table.api.ValidationException)5 RowData (org.apache.flink.table.data.RowData)5 Partition (org.apache.hadoop.hive.metastore.api.Partition)5 Set (java.util.Set)4