use of org.apache.spark.sql.connector.catalog.Identifier in project iceberg by apache.
the class Spark3Util method getPartitions.
/**
* Use Spark to list all partitions in the table.
*
* @param spark a Spark session
* @param rootPath a table identifier
* @param format format of the file
* @return all table's partitions
*/
public static List<SparkPartition> getPartitions(SparkSession spark, Path rootPath, String format) {
FileStatusCache fileStatusCache = FileStatusCache.getOrCreate(spark);
Map<String, String> emptyMap = Collections.emptyMap();
InMemoryFileIndex fileIndex = new InMemoryFileIndex(spark, JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(rootPath)).asScala().toSeq(), JavaConverters.mapAsScalaMapConverter(emptyMap).asScala().toMap(Predef.conforms()), Option.empty(), fileStatusCache, Option.empty(), Option.empty());
org.apache.spark.sql.execution.datasources.PartitionSpec spec = fileIndex.partitionSpec();
StructType schema = spec.partitionColumns();
return JavaConverters.seqAsJavaListConverter(spec.partitions()).asJava().stream().map(partition -> {
Map<String, String> values = Maps.newHashMap();
JavaConverters.asJavaIterableConverter(schema).asJava().forEach(field -> {
int fieldIndex = schema.fieldIndex(field.name());
Object catalystValue = partition.values().get(fieldIndex, field.dataType());
Object value = CatalystTypeConverters.convertToScala(catalystValue, field.dataType());
values.put(field.name(), String.valueOf(value));
});
return new SparkPartition(values, partition.path().toString(), format);
}).collect(Collectors.toList());
}
use of org.apache.spark.sql.connector.catalog.Identifier in project iceberg by apache.
the class Spark3Util method catalogAndIdentifier.
/**
* A modified version of Spark's LookupCatalog.CatalogAndIdentifier.unapply
* Attempts to find the catalog and identifier a multipart identifier represents
* @param spark Spark session to use for resolution
* @param nameParts Multipart identifier representing a table
* @param defaultCatalog Catalog to use if none is specified
* @return The CatalogPlugin and Identifier for the table
*/
public static CatalogAndIdentifier catalogAndIdentifier(SparkSession spark, List<String> nameParts, CatalogPlugin defaultCatalog) {
CatalogManager catalogManager = spark.sessionState().catalogManager();
String[] currentNamespace;
if (defaultCatalog.equals(catalogManager.currentCatalog())) {
currentNamespace = catalogManager.currentNamespace();
} else {
currentNamespace = defaultCatalog.defaultNamespace();
}
Pair<CatalogPlugin, Identifier> catalogIdentifier = SparkUtil.catalogAndIdentifier(nameParts, catalogName -> {
try {
return catalogManager.catalog(catalogName);
} catch (Exception e) {
return null;
}
}, Identifier::of, defaultCatalog, currentNamespace);
return new CatalogAndIdentifier(catalogIdentifier);
}
use of org.apache.spark.sql.connector.catalog.Identifier in project iceberg by apache.
the class RollbackToSnapshotProcedure method call.
@Override
public InternalRow[] call(InternalRow args) {
Identifier tableIdent = toIdentifier(args.getString(0), PARAMETERS[0].name());
long snapshotId = args.getLong(1);
return modifyIcebergTable(tableIdent, table -> {
Snapshot previousSnapshot = table.currentSnapshot();
table.manageSnapshots().rollbackTo(snapshotId).commit();
InternalRow outputRow = newInternalRow(previousSnapshot.snapshotId(), snapshotId);
return new InternalRow[] { outputRow };
});
}
use of org.apache.spark.sql.connector.catalog.Identifier in project iceberg by apache.
the class IcebergSource method getTable.
@Override
public Table getTable(StructType schema, Transform[] partitioning, Map<String, String> options) {
Spark3Util.CatalogAndIdentifier catalogIdentifier = catalogAndIdentifier(new CaseInsensitiveStringMap(options));
CatalogPlugin catalog = catalogIdentifier.catalog();
Identifier ident = catalogIdentifier.identifier();
try {
if (catalog instanceof TableCatalog) {
return ((TableCatalog) catalog).loadTable(ident);
}
} catch (NoSuchTableException e) {
// throwing an iceberg NoSuchTableException because the Spark one is typed and cant be thrown from this interface
throw new org.apache.iceberg.exceptions.NoSuchTableException(e, "Cannot find table for %s.", ident);
}
// throwing an iceberg NoSuchTableException because the Spark one is typed and cant be thrown from this interface
throw new org.apache.iceberg.exceptions.NoSuchTableException("Cannot find table for %s.", ident);
}
use of org.apache.spark.sql.connector.catalog.Identifier in project iceberg by apache.
the class IcebergSource method catalogAndIdentifier.
private Spark3Util.CatalogAndIdentifier catalogAndIdentifier(CaseInsensitiveStringMap options) {
Preconditions.checkArgument(options.containsKey("path"), "Cannot open table: path is not set");
SparkSession spark = SparkSession.active();
setupDefaultSparkCatalog(spark);
String path = options.get("path");
Long snapshotId = propertyAsLong(options, SparkReadOptions.SNAPSHOT_ID);
Long asOfTimestamp = propertyAsLong(options, SparkReadOptions.AS_OF_TIMESTAMP);
Preconditions.checkArgument(asOfTimestamp == null || snapshotId == null, "Cannot specify both snapshot-id (%s) and as-of-timestamp (%s)", snapshotId, asOfTimestamp);
String selector = null;
if (snapshotId != null) {
selector = SNAPSHOT_ID + snapshotId;
}
if (asOfTimestamp != null) {
selector = AT_TIMESTAMP + asOfTimestamp;
}
CatalogManager catalogManager = spark.sessionState().catalogManager();
if (path.contains("/")) {
// contains a path. Return iceberg default catalog and a PathIdentifier
String newPath = (selector == null) ? path : path + "#" + selector;
return new Spark3Util.CatalogAndIdentifier(catalogManager.catalog(DEFAULT_CATALOG_NAME), new PathIdentifier(newPath));
}
final Spark3Util.CatalogAndIdentifier catalogAndIdentifier = Spark3Util.catalogAndIdentifier("path or identifier", spark, path);
Identifier ident = identifierWithSelector(catalogAndIdentifier.identifier(), selector);
if (catalogAndIdentifier.catalog().name().equals("spark_catalog") && !(catalogAndIdentifier.catalog() instanceof SparkSessionCatalog)) {
// catalog is a session catalog but does not support Iceberg. Use Iceberg instead.
return new Spark3Util.CatalogAndIdentifier(catalogManager.catalog(DEFAULT_CATALOG_NAME), ident);
} else {
return new Spark3Util.CatalogAndIdentifier(catalogAndIdentifier.catalog(), ident);
}
}
Aggregations