use of org.apache.spark.sql.connector.catalog.Table in project OpenLineage by OpenLineage.
the class AlterTableDatasetBuilder method apply.
@Override
public List<OpenLineage.OutputDataset> apply(AlterTable alterTable) {
TableCatalog tableCatalog = alterTable.catalog();
Table table;
try {
table = alterTable.catalog().loadTable(alterTable.ident());
} catch (Exception e) {
return Collections.emptyList();
}
Optional<DatasetIdentifier> di = PlanUtils3.getDatasetIdentifier(context, tableCatalog, alterTable.ident(), table.properties());
if (di.isPresent()) {
OpenLineage openLineage = context.getOpenLineage();
OpenLineage.DatasetFacetsBuilder builder = openLineage.newDatasetFacetsBuilder().schema(PlanUtils.schemaFacet(openLineage, table.schema())).dataSource(PlanUtils.datasourceFacet(openLineage, di.get().getNamespace()));
Optional<String> datasetVersion = CatalogUtils3.getDatasetVersion(tableCatalog, alterTable.ident(), table.properties());
datasetVersion.ifPresent(version -> builder.version(openLineage.newDatasetVersionDatasetFacet(version)));
return Collections.singletonList(outputDataset().getDataset(di.get().getName(), di.get().getNamespace(), builder.build()));
} else {
return Collections.emptyList();
}
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class Spark3Util method loadIcebergTable.
/**
* Returns an Iceberg Table by its name from a Spark V2 Catalog. If cache is enabled in {@link SparkCatalog},
* the {@link TableOperations} of the table may be stale, please refresh the table to get the latest one.
*
* @param spark SparkSession used for looking up catalog references and tables
* @param name The multipart identifier of the Iceberg table
* @return an Iceberg table
*/
public static org.apache.iceberg.Table loadIcebergTable(SparkSession spark, String name) throws ParseException, NoSuchTableException {
CatalogAndIdentifier catalogAndIdentifier = catalogAndIdentifier(spark, name);
TableCatalog catalog = asTableCatalog(catalogAndIdentifier.catalog);
Table sparkTable = catalog.loadTable(catalogAndIdentifier.identifier);
return toIcebergTable(sparkTable);
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class Spark3Util method getPartitions.
/**
* Use Spark to list all partitions in the table.
*
* @param spark a Spark session
* @param rootPath a table identifier
* @param format format of the file
* @param partitionFilter partitionFilter of the file
* @return all table's partitions
*/
public static List<SparkPartition> getPartitions(SparkSession spark, Path rootPath, String format, Map<String, String> partitionFilter) {
FileStatusCache fileStatusCache = FileStatusCache.getOrCreate(spark);
InMemoryFileIndex fileIndex = new InMemoryFileIndex(spark, JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(rootPath)).asScala().toSeq(), scala.collection.immutable.Map$.MODULE$.<String, String>empty(), Option.empty(), fileStatusCache, Option.empty(), Option.empty());
org.apache.spark.sql.execution.datasources.PartitionSpec spec = fileIndex.partitionSpec();
StructType schema = spec.partitionColumns();
if (schema.isEmpty()) {
return Lists.newArrayList();
}
List<org.apache.spark.sql.catalyst.expressions.Expression> filterExpressions = SparkUtil.partitionMapToExpression(schema, partitionFilter);
Seq<org.apache.spark.sql.catalyst.expressions.Expression> scalaPartitionFilters = JavaConverters.asScalaBufferConverter(filterExpressions).asScala().toIndexedSeq();
List<org.apache.spark.sql.catalyst.expressions.Expression> dataFilters = Lists.newArrayList();
Seq<org.apache.spark.sql.catalyst.expressions.Expression> scalaDataFilters = JavaConverters.asScalaBufferConverter(dataFilters).asScala().toIndexedSeq();
Seq<PartitionDirectory> filteredPartitions = fileIndex.listFiles(scalaPartitionFilters, scalaDataFilters).toIndexedSeq();
return JavaConverters.seqAsJavaListConverter(filteredPartitions).asJava().stream().map(partition -> {
Map<String, String> values = Maps.newHashMap();
JavaConverters.asJavaIterableConverter(schema).asJava().forEach(field -> {
int fieldIndex = schema.fieldIndex(field.name());
Object catalystValue = partition.values().get(fieldIndex, field.dataType());
Object value = CatalystTypeConverters.convertToScala(catalystValue, field.dataType());
values.put(field.name(), String.valueOf(value));
});
FileStatus fileStatus = JavaConverters.seqAsJavaListConverter(partition.files()).asJava().get(0);
return new SparkPartition(values, fileStatus.getPath().getParent().toString(), format);
}).collect(Collectors.toList());
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class SparkSessionCatalog method stageReplace.
@Override
public StagedTable stageReplace(Identifier ident, StructType schema, Transform[] partitions, Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException {
String provider = properties.get("provider");
TableCatalog catalog;
if (useIceberg(provider)) {
if (asStagingCatalog != null) {
return asStagingCatalog.stageReplace(ident, schema, partitions, properties);
}
catalog = icebergCatalog;
} else {
catalog = getSessionCatalog();
}
// attempt to drop the table and fail if it doesn't exist
if (!catalog.dropTable(ident)) {
throw new NoSuchTableException(ident);
}
try {
// create the table with the session catalog, then wrap it in a staged table that will delete to roll back
Table table = catalog.createTable(ident, schema, partitions, properties);
return new RollbackStagedTable(catalog, ident, table);
} catch (TableAlreadyExistsException e) {
// the table was deleted, but now already exists again. retry the replace.
return stageReplace(ident, schema, partitions, properties);
}
}
use of org.apache.spark.sql.connector.catalog.Table in project iceberg by apache.
the class TestSparkCatalogOperations method testAlterTable.
@Test
public void testAlterTable() throws NoSuchTableException {
BaseCatalog catalog = (BaseCatalog) spark.sessionState().catalogManager().catalog(catalogName);
Identifier identifier = Identifier.of(tableIdent.namespace().levels(), tableIdent.name());
String fieldName = "location";
String propsKey = "note";
String propsValue = "jazz";
Table table = catalog.alterTable(identifier, TableChange.addColumn(new String[] { fieldName }, DataTypes.StringType, true), TableChange.setProperty(propsKey, propsValue));
Assert.assertNotNull("Should return updated table", table);
StructField expectedField = DataTypes.createStructField(fieldName, DataTypes.StringType, true);
Assert.assertEquals("Adding a column to a table should return the updated table with the new column", table.schema().fields()[2], expectedField);
Assert.assertTrue("Adding a property to a table should return the updated table with the new property", table.properties().containsKey(propsKey));
Assert.assertEquals("Altering a table to add a new property should add the correct value", propsValue, table.properties().get(propsKey));
}
Aggregations