use of org.apache.iceberg.spark.SparkCatalog in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkCatalogNamedHiveTable.
@Test
public void testSparkCatalogNamedHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.hive", "org.apache.iceberg.spark.SparkCatalog");
spark.conf().set("spark.sql.catalog.hive.type", "hadoop");
spark.conf().set("spark.sql.catalog.hive.warehouse", tableLocation);
SparkCatalog cat = (SparkCatalog) spark.sessionState().catalogManager().catalog("hive");
String[] database = { "default" };
Identifier id = Identifier.of(database, "table");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = cat.loadTable(id);
spark.sql("INSERT INTO hive.default.table VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.iceberg.spark.SparkCatalog in project OpenLineage by OpenLineage.
the class IcebergHandler method getDatasetVersion.
@SneakyThrows
public Optional<String> getDatasetVersion(TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
SparkCatalog sparkCatalog = (SparkCatalog) tableCatalog;
SparkTable table;
try {
table = sparkCatalog.loadTable(identifier);
} catch (NoSuchTableException ex) {
return Optional.empty();
}
if (table.table() != null && table.table().currentSnapshot() != null) {
return Optional.of(Long.toString(table.table().currentSnapshot().snapshotId()));
}
return Optional.empty();
}
use of org.apache.iceberg.spark.SparkCatalog in project OpenLineage by OpenLineage.
the class IcebergHandler method getDatasetIdentifier.
@Override
public DatasetIdentifier getDatasetIdentifier(SparkSession session, TableCatalog tableCatalog, Identifier identifier, Map<String, String> properties) {
SparkCatalog sparkCatalog = (SparkCatalog) tableCatalog;
String catalogName = sparkCatalog.name();
String prefix = String.format("spark.sql.catalog.%s", catalogName);
Map<String, String> conf = ScalaConversionUtils.<String, String>fromMap(session.conf().getAll());
log.info(conf.toString());
Map<String, String> catalogConf = conf.entrySet().stream().filter(x -> x.getKey().startsWith(prefix)).filter(x -> x.getKey().length() > prefix.length()).collect(Collectors.toMap(// handle dot after prefix
x -> x.getKey().substring(prefix.length() + 1), Map.Entry::getValue));
log.info(catalogConf.toString());
if (catalogConf.isEmpty() || !catalogConf.containsKey("type")) {
throw new UnsupportedCatalogException(catalogName);
}
log.info(catalogConf.get("type"));
switch(catalogConf.get("type")) {
case "hadoop":
return getHadoopIdentifier(catalogConf, identifier.toString());
case "hive":
return getHiveIdentifier(session, catalogConf.get(CatalogProperties.URI), identifier.toString());
default:
throw new UnsupportedCatalogException(catalogConf.get("type"));
}
}
use of org.apache.iceberg.spark.SparkCatalog in project OpenLineage by OpenLineage.
the class IcebergHandlerTest method testGetDatasetIdentifierForHive.
@Test
public void testGetDatasetIdentifierForHive() {
when(sparkSession.conf()).thenReturn(runtimeConfig);
when(runtimeConfig.getAll()).thenReturn(new Map.Map2<>("spark.sql.catalog.test.type", "hive", "spark.sql.catalog.test.uri", "thrift://metastore-host:10001"));
SparkCatalog sparkCatalog = mock(SparkCatalog.class);
when(sparkCatalog.name()).thenReturn("test");
DatasetIdentifier datasetIdentifier = icebergHandler.getDatasetIdentifier(sparkSession, sparkCatalog, Identifier.of(new String[] { "database", "schema" }, "table"), new HashMap<>());
assertEquals("database.schema.table", datasetIdentifier.getName());
assertEquals("hive://metastore-host:10001", datasetIdentifier.getNamespace());
}
use of org.apache.iceberg.spark.SparkCatalog in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkCatalogNamedHadoopTable.
@Test
public void testSparkCatalogNamedHadoopTable() throws Exception {
spark.conf().set("spark.sql.catalog.hadoop", "org.apache.iceberg.spark.SparkCatalog");
spark.conf().set("spark.sql.catalog.hadoop.type", "hadoop");
spark.conf().set("spark.sql.catalog.hadoop.warehouse", tableLocation);
SparkCatalog cat = (SparkCatalog) spark.sessionState().catalogManager().catalog("hadoop");
String[] database = { "default" };
Identifier id = Identifier.of(database, "table");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = cat.loadTable(id);
spark.sql("INSERT INTO hadoop.default.table VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
Aggregations