use of org.apache.iceberg.spark.SparkSessionCatalog in project iceberg by apache.
the class IcebergSource method catalogAndIdentifier.
private Spark3Util.CatalogAndIdentifier catalogAndIdentifier(CaseInsensitiveStringMap options) {
Preconditions.checkArgument(options.containsKey("path"), "Cannot open table: path is not set");
SparkSession spark = SparkSession.active();
setupDefaultSparkCatalog(spark);
String path = options.get("path");
Long snapshotId = propertyAsLong(options, SparkReadOptions.SNAPSHOT_ID);
Long asOfTimestamp = propertyAsLong(options, SparkReadOptions.AS_OF_TIMESTAMP);
Preconditions.checkArgument(asOfTimestamp == null || snapshotId == null, "Cannot specify both snapshot-id (%s) and as-of-timestamp (%s)", snapshotId, asOfTimestamp);
String selector = null;
if (snapshotId != null) {
selector = SNAPSHOT_ID + snapshotId;
}
if (asOfTimestamp != null) {
selector = AT_TIMESTAMP + asOfTimestamp;
}
CatalogManager catalogManager = spark.sessionState().catalogManager();
if (path.contains("/")) {
// contains a path. Return iceberg default catalog and a PathIdentifier
String newPath = (selector == null) ? path : path + "#" + selector;
return new Spark3Util.CatalogAndIdentifier(catalogManager.catalog(DEFAULT_CATALOG_NAME), new PathIdentifier(newPath));
}
final Spark3Util.CatalogAndIdentifier catalogAndIdentifier = Spark3Util.catalogAndIdentifier("path or identifier", spark, path);
Identifier ident = identifierWithSelector(catalogAndIdentifier.identifier(), selector);
if (catalogAndIdentifier.catalog().name().equals("spark_catalog") && !(catalogAndIdentifier.catalog() instanceof SparkSessionCatalog)) {
// catalog is a session catalog but does not support Iceberg. Use Iceberg instead.
return new Spark3Util.CatalogAndIdentifier(catalogManager.catalog(DEFAULT_CATALOG_NAME), ident);
} else {
return new Spark3Util.CatalogAndIdentifier(catalogAndIdentifier.catalog(), ident);
}
}
use of org.apache.iceberg.spark.SparkSessionCatalog in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkSessionCatalogHiveTable.
@Test
public void testSparkSessionCatalogHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog");
spark.conf().set("spark.sql.catalog.spark_catalog.type", "hive");
SparkSessionCatalog cat = (SparkSessionCatalog) spark.sessionState().catalogManager().v2SessionCatalog();
String[] database = { "default" };
Identifier id = Identifier.of(database, "sessioncattest");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.dropTable(id);
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = (SparkTable) cat.loadTable(id);
spark.sql("INSERT INTO default.sessioncattest VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.iceberg.spark.SparkSessionCatalog in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkSessionCatalogHadoopTable.
@Test
public void testSparkSessionCatalogHadoopTable() throws Exception {
spark.conf().set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog");
spark.conf().set("spark.sql.catalog.spark_catalog.type", "hadoop");
spark.conf().set("spark.sql.catalog.spark_catalog.warehouse", tableLocation);
SparkSessionCatalog cat = (SparkSessionCatalog) spark.sessionState().catalogManager().v2SessionCatalog();
String[] database = { "default" };
Identifier id = Identifier.of(database, "table");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = (SparkTable) cat.loadTable(id);
spark.sql("INSERT INTO default.table VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
Aggregations