use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkCatalogNamedHiveTable.
@Test
public void testSparkCatalogNamedHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.hive", "org.apache.iceberg.spark.SparkCatalog");
spark.conf().set("spark.sql.catalog.hive.type", "hadoop");
spark.conf().set("spark.sql.catalog.hive.warehouse", tableLocation);
SparkCatalog cat = (SparkCatalog) spark.sessionState().catalogManager().catalog("hive");
String[] database = { "default" };
Identifier id = Identifier.of(database, "table");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = cat.loadTable(id);
spark.sql("INSERT INTO hive.default.table VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkSessionCatalogHiveTable.
@Test
public void testSparkSessionCatalogHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog");
spark.conf().set("spark.sql.catalog.spark_catalog.type", "hive");
SparkSessionCatalog cat = (SparkSessionCatalog) spark.sessionState().catalogManager().v2SessionCatalog();
String[] database = { "default" };
Identifier id = Identifier.of(database, "sessioncattest");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.dropTable(id);
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = (SparkTable) cat.loadTable(id);
spark.sql("INSERT INTO default.sessioncattest VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.iceberg.spark.source.SparkTable in project OpenLineage by OpenLineage.
the class IcebergHandler method getDatasetVersion.
@SneakyThrows
public Optional<String> getDatasetVersion(TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
SparkCatalog sparkCatalog = (SparkCatalog) tableCatalog;
SparkTable table;
try {
table = sparkCatalog.loadTable(identifier);
} catch (NoSuchTableException ex) {
return Optional.empty();
}
if (table.table() != null && table.table().currentSnapshot() != null) {
return Optional.of(Long.toString(table.table().currentSnapshot().snapshotId()));
}
return Optional.empty();
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class TestAlterTablePartitionFields method sparkTable.
private SparkTable sparkTable() throws Exception {
validationCatalog.loadTable(tableIdent).refresh();
CatalogManager catalogManager = spark.sessionState().catalogManager();
TableCatalog catalog = (TableCatalog) catalogManager.catalog(catalogName);
Identifier identifier = Identifier.of(tableIdent.namespace().levels(), tableIdent.name());
return (SparkTable) catalog.loadTable(identifier);
}
use of org.apache.iceberg.spark.source.SparkTable in project iceberg by apache.
the class Spark3Util method loadIcebergTable.
/**
* Returns an Iceberg Table by its name from a Spark V2 Catalog. If cache is enabled in {@link SparkCatalog},
* the {@link TableOperations} of the table may be stale, please refresh the table to get the latest one.
*
* @param spark SparkSession used for looking up catalog references and tables
* @param name The multipart identifier of the Iceberg table
* @return an Iceberg table
*/
public static org.apache.iceberg.Table loadIcebergTable(SparkSession spark, String name) throws ParseException, NoSuchTableException {
CatalogAndIdentifier catalogAndIdentifier = catalogAndIdentifier(spark, name);
TableCatalog catalog = asTableCatalog(catalogAndIdentifier.catalog);
Table sparkTable = catalog.loadTable(catalogAndIdentifier.identifier);
return toIcebergTable(sparkTable);
}
Aggregations