use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class Spark3Util method getPartitions.
/**
* Use Spark to list all partitions in the table.
*
* @param spark a Spark session
* @param rootPath a table identifier
* @param format format of the file
* @return all table's partitions
*/
public static List<SparkPartition> getPartitions(SparkSession spark, Path rootPath, String format) {
FileStatusCache fileStatusCache = FileStatusCache.getOrCreate(spark);
Map<String, String> emptyMap = Collections.emptyMap();
InMemoryFileIndex fileIndex = new InMemoryFileIndex(spark, JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(rootPath)).asScala().toSeq(), JavaConverters.mapAsScalaMapConverter(emptyMap).asScala().toMap(Predef.conforms()), Option.empty(), fileStatusCache, Option.empty(), Option.empty());
org.apache.spark.sql.execution.datasources.PartitionSpec spec = fileIndex.partitionSpec();
StructType schema = spec.partitionColumns();
return JavaConverters.seqAsJavaListConverter(spec.partitions()).asJava().stream().map(partition -> {
Map<String, String> values = Maps.newHashMap();
JavaConverters.asJavaIterableConverter(schema).asJava().forEach(field -> {
int fieldIndex = schema.fieldIndex(field.name());
Object catalystValue = partition.values().get(fieldIndex, field.dataType());
Object value = CatalystTypeConverters.convertToScala(catalystValue, field.dataType());
values.put(field.name(), String.valueOf(value));
});
return new SparkPartition(values, partition.path().toString(), format);
}).collect(Collectors.toList());
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class SparkSessionCatalog method stageCreateOrReplace.
@Override
public StagedTable stageCreateOrReplace(Identifier ident, StructType schema, Transform[] partitions, Map<String, String> properties) throws NoSuchNamespaceException {
String provider = properties.get("provider");
TableCatalog catalog;
if (useIceberg(provider)) {
if (asStagingCatalog != null) {
return asStagingCatalog.stageCreateOrReplace(ident, schema, partitions, properties);
}
catalog = icebergCatalog;
} else {
catalog = getSessionCatalog();
}
// drop the table if it exists
catalog.dropTable(ident);
try {
// create the table with the session catalog, then wrap it in a staged table that will delete to roll back
Table sessionCatalogTable = catalog.createTable(ident, schema, partitions, properties);
return new RollbackStagedTable(catalog, ident, sessionCatalogTable);
} catch (TableAlreadyExistsException e) {
// the table was deleted, but now already exists again. retry the replace.
return stageCreateOrReplace(ident, schema, partitions, properties);
}
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class SparkSessionCatalog method stageCreate.
@Override
public StagedTable stageCreate(Identifier ident, StructType schema, Transform[] partitions, Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
String provider = properties.get("provider");
TableCatalog catalog;
if (useIceberg(provider)) {
if (asStagingCatalog != null) {
return asStagingCatalog.stageCreate(ident, schema, partitions, properties);
}
catalog = icebergCatalog;
} else {
catalog = getSessionCatalog();
}
// create the table with the session catalog, then wrap it in a staged table that will delete to roll back
Table table = catalog.createTable(ident, schema, partitions, properties);
return new RollbackStagedTable(catalog, ident, table);
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class BaseProcedure method loadSparkTable.
protected SparkTable loadSparkTable(Identifier ident) {
try {
Table table = tableCatalog.loadTable(ident);
ValidationException.check(table instanceof SparkTable, "%s is not %s", ident, SparkTable.class.getName());
return (SparkTable) table;
} catch (NoSuchTableException e) {
String errMsg = String.format("Couldn't load table '%s' in catalog '%s'", ident, tableCatalog.name());
throw new RuntimeException(errMsg, e);
}
}
use of org.apache.spark.sql.connector.catalog.Table in project OpenLineage by OpenLineage.
the class DeltaHandler method getDatasetVersion.
@SneakyThrows
public Optional<String> getDatasetVersion(TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
DeltaCatalog deltaCatalog = (DeltaCatalog) tableCatalog;
Table table = deltaCatalog.loadTable(identifier);
if (table instanceof DeltaTableV2) {
DeltaTableV2 deltaTable = (DeltaTableV2) table;
return Optional.of(Long.toString(deltaTable.snapshot().version()));
}
return Optional.empty();
}
Aggregations