use of org.apache.spark.sql.catalyst.catalog.CatalogTable in project iceberg by apache.
the class SparkTableUtil method getPartitions.
/**
* Returns all partitions in the table.
*
* @param spark a Spark session
* @param tableIdent a table identifier
* @param partitionFilter partition filter, or null if no filter
* @return all table's partitions
*/
public static List<SparkPartition> getPartitions(SparkSession spark, TableIdentifier tableIdent, Map<String, String> partitionFilter) {
try {
SessionCatalog catalog = spark.sessionState().catalog();
CatalogTable catalogTable = catalog.getTableMetadata(tableIdent);
Option<scala.collection.immutable.Map<String, String>> scalaPartitionFilter;
if (partitionFilter != null && !partitionFilter.isEmpty()) {
Builder<Tuple2<String, String>, scala.collection.immutable.Map<String, String>> builder = Map$.MODULE$.<String, String>newBuilder();
partitionFilter.forEach((key, value) -> builder.$plus$eq(Tuple2.apply(key, value)));
scalaPartitionFilter = Option.apply(builder.result());
} else {
scalaPartitionFilter = Option.empty();
}
Seq<CatalogTablePartition> partitions = catalog.listPartitions(tableIdent, scalaPartitionFilter).toIndexedSeq();
return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream().map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)).collect(Collectors.toList());
} catch (NoSuchDatabaseException e) {
throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdent);
} catch (NoSuchTableException e) {
throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Table not found in catalog.", tableIdent);
}
}
use of org.apache.spark.sql.catalyst.catalog.CatalogTable in project iceberg by apache.
the class SparkTableUtil method getPartitionsByFilter.
/**
* Returns partitions that match the specified 'predicate'.
*
* @param spark a Spark session
* @param tableIdent a table identifier
* @param predicateExpr a predicate expression on partition columns
* @return matching table's partitions
*/
public static List<SparkPartition> getPartitionsByFilter(SparkSession spark, TableIdentifier tableIdent, Expression predicateExpr) {
try {
SessionCatalog catalog = spark.sessionState().catalog();
CatalogTable catalogTable = catalog.getTableMetadata(tableIdent);
Expression resolvedPredicateExpr;
if (!predicateExpr.resolved()) {
resolvedPredicateExpr = resolveAttrs(spark, tableIdent.quotedString(), predicateExpr);
} else {
resolvedPredicateExpr = predicateExpr;
}
Seq<Expression> predicates = JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(resolvedPredicateExpr)).asScala().toIndexedSeq();
Seq<CatalogTablePartition> partitions = catalog.listPartitionsByFilter(tableIdent, predicates).toIndexedSeq();
return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream().map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)).collect(Collectors.toList());
} catch (NoSuchDatabaseException e) {
throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdent);
} catch (NoSuchTableException e) {
throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Table not found in catalog.", tableIdent);
}
}
use of org.apache.spark.sql.catalyst.catalog.CatalogTable in project iceberg by apache.
the class TestCreateActions method expectedFilesCount.
private long expectedFilesCount(String source) throws NoSuchDatabaseException, NoSuchTableException, ParseException {
CatalogTable sourceTable = loadSessionTable(source);
List<URI> uris;
if (sourceTable.partitionColumnNames().size() == 0) {
uris = Lists.newArrayList();
uris.add(sourceTable.location());
} else {
Seq<CatalogTablePartition> catalogTablePartitionSeq = spark.sessionState().catalog().listPartitions(sourceTable.identifier(), Option.apply(null));
uris = JavaConverters.seqAsJavaList(catalogTablePartitionSeq).stream().map(CatalogTablePartition::location).collect(Collectors.toList());
}
return uris.stream().flatMap(uri -> FileUtils.listFiles(Paths.get(uri).toFile(), TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).stream()).filter(file -> !file.toString().endsWith("crc") && !file.toString().contains("_SUCCESS")).count();
}
use of org.apache.spark.sql.catalyst.catalog.CatalogTable in project iceberg by apache.
the class TestCreateActions method createSourceTable.
private void createSourceTable(String createStatement, String tableName) throws IOException, NoSuchTableException, NoSuchDatabaseException, ParseException {
File location = temp.newFolder();
spark.sql(String.format(createStatement, tableName, location));
CatalogTable table = loadSessionTable(tableName);
Seq<String> partitionColumns = table.partitionColumnNames();
String format = table.provider().get();
spark.table(baseTableName).write().mode(SaveMode.Append).format(format).partitionBy(partitionColumns.toSeq()).saveAsTable(tableName);
}
use of org.apache.spark.sql.catalyst.catalog.CatalogTable in project OpenLineage by OpenLineage.
the class AlterTableAddColumnsCommandVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(LogicalPlan x) {
Optional<CatalogTable> tableOption = catalogTableFor(((AlterTableAddColumnsCommand) x).table());
if (!tableOption.isPresent()) {
return Collections.emptyList();
}
CatalogTable catalogTable = tableOption.get();
List<StructField> tableColumns = Arrays.asList(catalogTable.schema().fields());
List<StructField> addedColumns = JavaConversions.seqAsJavaList(((AlterTableAddColumnsCommand) x).colsToAdd());
if (tableColumns.containsAll(addedColumns)) {
return Collections.singletonList(outputDataset().getDataset(PathUtils.fromCatalogTable(catalogTable), catalogTable.schema()));
} else {
// apply triggered before applying the change - do not send an event
return Collections.emptyList();
}
}
Aggregations