use of org.apache.iceberg.PartitionSpec in project incubator-gobblin by apache.
the class IcebergMetadataWriter method createTable.
protected Table createTable(GobblinMetadataChangeEvent gmce, HiveSpec spec) throws IOException {
String schema = gmce.getTableSchema();
org.apache.hadoop.hive.metastore.api.Table table = HiveMetaStoreUtils.getTable(spec.getTable());
IcebergUtils.IcebergDataAndPartitionSchema schemas = IcebergUtils.getIcebergSchema(schema, table);
TableIdentifier tid = TableIdentifier.of(table.getDbName(), table.getTableName());
Schema tableSchema = schemas.tableSchema;
Preconditions.checkState(tableSchema != null, "Table schema cannot be null when creating a table");
PartitionSpec partitionSpec = IcebergUtils.getPartitionSpec(tableSchema, schemas.partitionSchema);
Table icebergTable = null;
String tableLocation = null;
if (useDataLocationAsTableLocation) {
tableLocation = gmce.getDatasetIdentifier().getNativeName() + String.format(TABLE_LOCATION_SUFFIX, table.getDbName());
// Set the path permission
Path tablePath = new Path(tableLocation);
WriterUtils.mkdirsWithRecursivePermission(tablePath.getFileSystem(conf), tablePath, permission);
}
try (Timer.Context context = metricContext.timer(CREATE_TABLE_TIME).time()) {
icebergTable = catalog.createTable(tid, tableSchema, partitionSpec, tableLocation, IcebergUtils.getTableProperties(table));
log.info("Created table {}, schema: {} partition spec: {}", tid, tableSchema, partitionSpec);
} catch (AlreadyExistsException e) {
log.warn("table {} already exist, there may be some other process try to create table concurrently", tid);
}
return icebergTable;
}
use of org.apache.iceberg.PartitionSpec in project incubator-gobblin by apache.
the class IcebergMetadataWriter method dropFiles.
/**
* Deal with both regular file deletions manifested by GMCE(aggregation but no commit),
* and expiring older snapshots(commit).
*/
protected void dropFiles(GobblinMetadataChangeEvent gmce, Map<String, Collection<HiveSpec>> oldSpecsMap, Table table, TableMetadata tableMetadata, TableIdentifier tid) throws IOException {
PartitionSpec partitionSpec = table.spec();
// Update DeleteFiles in tableMetadata: This is regular file deletion
DeleteFiles deleteFiles = tableMetadata.getOrInitDeleteFiles();
Set<DataFile> oldDataFiles = getIcebergDataFilesToBeDeleted(gmce, table, new HashMap<>(), oldSpecsMap, partitionSpec);
oldDataFiles.forEach(deleteFiles::deleteFile);
// Update ExpireSnapshots and commit the updates at once: This is for expiring snapshots that are
// beyond look-back allowance for time-travel.
parallelRunner.submitCallable(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
long olderThan = getExpireSnapshotTime();
long start = System.currentTimeMillis();
ExpireSnapshots expireSnapshots = table.expireSnapshots();
final Table tmpTable = table;
expireSnapshots.deleteWith(new Consumer<String>() {
@Override
public void accept(String file) {
if (file.startsWith(tmpTable.location())) {
tmpTable.io().deleteFile(file);
}
}
}).expireOlderThan(olderThan).commit();
// TODO: emit these metrics to Ingraphs, in addition to metrics for publishing new snapshots and other Iceberg metadata operations.
log.info("Spent {} ms to expire snapshots older than {} ({}) in table {}", System.currentTimeMillis() - start, new DateTime(olderThan).toString(), olderThan, tid.toString());
} catch (Exception e) {
log.error(String.format("Fail to expire snapshots for table %s due to exception ", tid.toString()), e);
}
return null;
}
}, tid.toString());
}
use of org.apache.iceberg.PartitionSpec in project incubator-gobblin by apache.
the class IcebergMetadataWriter method addLatePartitionValueToIcebergTable.
/**
* 1. Add "late" partition column to iceberg table if not exists
* 2. compute "late" partition value based on datepartition and completion watermark
* 3. Default to late=0 if completion watermark check is disabled
* @param table
* @param tableMetadata
* @param hivePartition
* @param datepartition
* @return new iceberg partition value for file
*/
private StructLike addLatePartitionValueToIcebergTable(Table table, TableMetadata tableMetadata, HivePartition hivePartition, String datepartition) {
table = addPartitionToIcebergTable(table, newPartitionColumn, newPartitionColumnType);
PartitionSpec partitionSpec = table.spec();
long prevCompletenessWatermark = tableMetadata.prevCompletenessWatermark;
int late = !tableMetadata.completenessEnabled ? 0 : isLate(datepartition, prevCompletenessWatermark);
List<String> partitionValues = new ArrayList<>(hivePartition.getValues());
partitionValues.add(String.valueOf(late));
return IcebergUtils.getPartition(partitionSpec.partitionType(), partitionValues);
}
Aggregations