use of org.apache.iceberg.spark.source.StagedSparkTable in project iceberg by apache.
the class SparkCatalog method stageCreate.
@Override
public StagedTable stageCreate(Identifier ident, StructType schema, Transform[] transforms, Map<String, String> properties) throws TableAlreadyExistsException {
Schema icebergSchema = SparkSchemaUtil.convert(schema, useTimestampsWithoutZone);
try {
Catalog.TableBuilder builder = newBuilder(ident, icebergSchema);
Transaction transaction = builder.withPartitionSpec(Spark3Util.toPartitionSpec(icebergSchema, transforms)).withLocation(properties.get("location")).withProperties(Spark3Util.rebuildCreateProperties(properties)).createTransaction();
return new StagedSparkTable(transaction);
} catch (AlreadyExistsException e) {
throw new TableAlreadyExistsException(ident);
}
}
use of org.apache.iceberg.spark.source.StagedSparkTable in project iceberg by apache.
the class SparkCatalog method stageCreateOrReplace.
@Override
public StagedTable stageCreateOrReplace(Identifier ident, StructType schema, Transform[] transforms, Map<String, String> properties) {
Schema icebergSchema = SparkSchemaUtil.convert(schema, useTimestampsWithoutZone);
Catalog.TableBuilder builder = newBuilder(ident, icebergSchema);
Transaction transaction = builder.withPartitionSpec(Spark3Util.toPartitionSpec(icebergSchema, transforms)).withLocation(properties.get("location")).withProperties(Spark3Util.rebuildCreateProperties(properties)).createOrReplaceTransaction();
return new StagedSparkTable(transaction);
}
use of org.apache.iceberg.spark.source.StagedSparkTable in project iceberg by apache.
the class BaseTableCreationSparkAction method stageDestTable.
protected StagedSparkTable stageDestTable() {
try {
Map<String, String> props = destTableProps();
StructType schema = sourceTable.schema();
Transform[] partitioning = sourceTable.partitioning();
return (StagedSparkTable) destCatalog().stageCreate(destTableIdent(), schema, partitioning, props);
} catch (org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException e) {
throw new NoSuchNamespaceException("Cannot create table %s as the namespace does not exist", destTableIdent());
} catch (org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException e) {
throw new AlreadyExistsException("Cannot create table %s as it already exists", destTableIdent());
}
}
use of org.apache.iceberg.spark.source.StagedSparkTable in project iceberg by apache.
the class BaseSnapshotTableSparkAction method doExecute.
private SnapshotTable.Result doExecute() {
Preconditions.checkArgument(destCatalog() != null && destTableIdent() != null, "The destination catalog and identifier cannot be null. " + "Make sure to configure the action with a valid destination table identifier via the `as` method.");
LOG.info("Staging a new Iceberg table {} as a snapshot of {}", destTableIdent(), sourceTableIdent());
StagedSparkTable stagedTable = stageDestTable();
Table icebergTable = stagedTable.table();
// TODO: Check the dest table location does not overlap with the source table location
boolean threw = true;
try {
LOG.info("Ensuring {} has a valid name mapping", destTableIdent());
ensureNameMappingPresent(icebergTable);
TableIdentifier v1TableIdent = v1SourceTable().identifier();
String stagingLocation = getMetadataLocation(icebergTable);
LOG.info("Generating Iceberg metadata for {} in {}", destTableIdent(), stagingLocation);
SparkTableUtil.importSparkTable(spark(), v1TableIdent, icebergTable, stagingLocation);
LOG.info("Committing staged changes to {}", destTableIdent());
stagedTable.commitStagedChanges();
threw = false;
} finally {
if (threw) {
LOG.error("Error when populating the staged table with metadata, aborting changes");
try {
stagedTable.abortStagedChanges();
} catch (Exception abortException) {
LOG.error("Cannot abort staged changes", abortException);
}
}
}
Snapshot snapshot = icebergTable.currentSnapshot();
long importedDataFilesCount = Long.parseLong(snapshot.summary().get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
LOG.info("Successfully loaded Iceberg metadata for {} files to {}", importedDataFilesCount, destTableIdent());
return new BaseSnapshotTableActionResult(importedDataFilesCount);
}
use of org.apache.iceberg.spark.source.StagedSparkTable in project iceberg by apache.
the class BaseMigrateTableSparkAction method doExecute.
private MigrateTable.Result doExecute() {
LOG.info("Starting the migration of {} to Iceberg", sourceTableIdent());
// move the source table to a new name, halting all modifications and allowing us to stage
// the creation of a new Iceberg table in its place
renameAndBackupSourceTable();
StagedSparkTable stagedTable = null;
Table icebergTable;
boolean threw = true;
try {
LOG.info("Staging a new Iceberg table {}", destTableIdent());
stagedTable = stageDestTable();
icebergTable = stagedTable.table();
LOG.info("Ensuring {} has a valid name mapping", destTableIdent());
ensureNameMappingPresent(icebergTable);
Some<String> backupNamespace = Some.apply(backupIdent.namespace()[0]);
TableIdentifier v1BackupIdent = new TableIdentifier(backupIdent.name(), backupNamespace);
String stagingLocation = getMetadataLocation(icebergTable);
LOG.info("Generating Iceberg metadata for {} in {}", destTableIdent(), stagingLocation);
SparkTableUtil.importSparkTable(spark(), v1BackupIdent, icebergTable, stagingLocation);
LOG.info("Committing staged changes to {}", destTableIdent());
stagedTable.commitStagedChanges();
threw = false;
} finally {
if (threw) {
LOG.error("Failed to perform the migration, aborting table creation and restoring the original table");
restoreSourceTable();
if (stagedTable != null) {
try {
stagedTable.abortStagedChanges();
} catch (Exception abortException) {
LOG.error("Cannot abort staged changes", abortException);
}
}
}
}
Snapshot snapshot = icebergTable.currentSnapshot();
long migratedDataFilesCount = Long.parseLong(snapshot.summary().get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
LOG.info("Successfully loaded Iceberg metadata for {} files to {}", migratedDataFilesCount, destTableIdent());
return new BaseMigrateTableActionResult(migratedDataFilesCount);
}
Aggregations