use of org.apache.spark.sql.connector.expressions.SortOrder in project iceberg by apache.
the class SparkDistributionAndOrderingUtil method buildPositionMergeDistribution.
private static Distribution buildPositionMergeDistribution(Table table, DistributionMode distributionMode) {
switch(distributionMode) {
case NONE:
return Distributions.unspecified();
case HASH:
if (table.spec().isUnpartitioned()) {
Expression[] clustering = new Expression[] { SPEC_ID, PARTITION, FILE_PATH };
return Distributions.clustered(clustering);
} else {
Distribution dataDistribution = buildRequiredDistribution(table, distributionMode);
Expression[] dataClustering = ((ClusteredDistribution) dataDistribution).clustering();
Expression[] deleteClustering = new Expression[] { SPEC_ID, PARTITION };
Expression[] clustering = ObjectArrays.concat(deleteClustering, dataClustering, Expression.class);
return Distributions.clustered(clustering);
}
case RANGE:
Distribution dataDistribution = buildRequiredDistribution(table, distributionMode);
SortOrder[] dataOrdering = ((OrderedDistribution) dataDistribution).ordering();
SortOrder[] deleteOrdering = new SortOrder[] { SPEC_ID_ORDER, PARTITION_ORDER, FILE_PATH_ORDER };
SortOrder[] ordering = ObjectArrays.concat(deleteOrdering, dataOrdering, SortOrder.class);
return Distributions.ordered(ordering);
default:
throw new IllegalArgumentException("Unexpected distribution mode: " + distributionMode);
}
}
use of org.apache.spark.sql.connector.expressions.SortOrder in project iceberg by apache.
the class TestSparkDistributionAndOrderingUtil method testHashCopyOnWriteUpdateUnpartitionedSortedTable.
@Test
public void testHashCopyOnWriteUpdateUnpartitionedSortedTable() {
sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
Table table = validationCatalog.loadTable(tableIdent);
table.updateProperties().set(UPDATE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_HASH).commit();
table.replaceSortOrder().asc("id").asc("data").commit();
SortOrder[] expectedOrdering = new SortOrder[] { Expressions.sort(Expressions.column("id"), SortDirection.ASCENDING), Expressions.sort(Expressions.column("data"), SortDirection.ASCENDING) };
checkCopyOnWriteDistributionAndOrdering(table, UPDATE, FILE_CLUSTERED_DISTRIBUTION, expectedOrdering);
}
use of org.apache.spark.sql.connector.expressions.SortOrder in project iceberg by apache.
the class TestSparkDistributionAndOrderingUtil method testDefaultCopyOnWriteDeleteUnpartitionedSortedTable.
@Test
public void testDefaultCopyOnWriteDeleteUnpartitionedSortedTable() {
sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
Table table = validationCatalog.loadTable(tableIdent);
table.replaceSortOrder().asc("id").asc("data").commit();
SortOrder[] expectedOrdering = new SortOrder[] { Expressions.sort(Expressions.column("id"), SortDirection.ASCENDING), Expressions.sort(Expressions.column("data"), SortDirection.ASCENDING) };
checkCopyOnWriteDistributionAndOrdering(table, DELETE, FILE_CLUSTERED_DISTRIBUTION, expectedOrdering);
}
use of org.apache.spark.sql.connector.expressions.SortOrder in project iceberg by apache.
the class TestSparkDistributionAndOrderingUtil method testNonePositionDeltaMergePartitionedSortedTable.
@Test
public void testNonePositionDeltaMergePartitionedSortedTable() {
sql("CREATE TABLE %s (id BIGINT, data STRING, date DATE, ts TIMESTAMP) " + "USING iceberg " + "PARTITIONED BY (date)", tableName);
Table table = validationCatalog.loadTable(tableIdent);
table.updateProperties().set(MERGE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_NONE).commit();
table.replaceSortOrder().desc("id").commit();
SortOrder[] expectedOrdering = new SortOrder[] { Expressions.sort(Expressions.column(MetadataColumns.SPEC_ID.name()), SortDirection.ASCENDING), Expressions.sort(Expressions.column(MetadataColumns.PARTITION_COLUMN_NAME), SortDirection.ASCENDING), Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), SortDirection.ASCENDING), Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), SortDirection.ASCENDING), Expressions.sort(Expressions.column("date"), SortDirection.ASCENDING), Expressions.sort(Expressions.column("id"), SortDirection.DESCENDING) };
checkPositionDeltaDistributionAndOrdering(table, MERGE, UNSPECIFIED_DISTRIBUTION, expectedOrdering);
}
use of org.apache.spark.sql.connector.expressions.SortOrder in project iceberg by apache.
the class TestSparkDistributionAndOrderingUtil method testRangeCopyOnWriteDeleteUnpartitionedSortedTable.
@Test
public void testRangeCopyOnWriteDeleteUnpartitionedSortedTable() {
sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
Table table = validationCatalog.loadTable(tableIdent);
table.updateProperties().set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_RANGE).commit();
table.replaceSortOrder().asc("id").asc("data").commit();
SortOrder[] expectedOrdering = new SortOrder[] { Expressions.sort(Expressions.column("id"), SortDirection.ASCENDING), Expressions.sort(Expressions.column("data"), SortDirection.ASCENDING) };
Distribution expectedDistribution = Distributions.ordered(expectedOrdering);
checkCopyOnWriteDistributionAndOrdering(table, DELETE, expectedDistribution, expectedOrdering);
}
Aggregations