use of org.apache.spark.sql.connector.expressions.Transform in project iceberg by apache.
the class Spark3Util method toPartitionSpec.
/**
* Converts Spark transforms into a {@link PartitionSpec}.
*
* @param schema the table schema
* @param partitioning Spark Transforms
* @return a PartitionSpec
*/
public static PartitionSpec toPartitionSpec(Schema schema, Transform[] partitioning) {
if (partitioning == null || partitioning.length == 0) {
return PartitionSpec.unpartitioned();
}
PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
for (Transform transform : partitioning) {
Preconditions.checkArgument(transform.references().length == 1, "Cannot convert transform with more than one column reference: %s", transform);
String colName = DOT.join(transform.references()[0].fieldNames());
switch(transform.name()) {
case "identity":
builder.identity(colName);
break;
case "bucket":
builder.bucket(colName, findWidth(transform));
break;
case "years":
builder.year(colName);
break;
case "months":
builder.month(colName);
break;
case "date":
case "days":
builder.day(colName);
break;
case "date_hour":
case "hours":
builder.hour(colName);
break;
case "truncate":
builder.truncate(colName, findWidth(transform));
break;
default:
throw new UnsupportedOperationException("Transform is not supported: " + transform);
}
}
return builder.build();
}
use of org.apache.spark.sql.connector.expressions.Transform in project iceberg by apache.
the class BaseTableCreationSparkAction method stageDestTable.
protected StagedSparkTable stageDestTable() {
try {
Map<String, String> props = destTableProps();
StructType schema = sourceTable.schema();
Transform[] partitioning = sourceTable.partitioning();
return (StagedSparkTable) destCatalog().stageCreate(destTableIdent(), schema, partitioning, props);
} catch (org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException e) {
throw new NoSuchNamespaceException("Cannot create table %s as the namespace does not exist", destTableIdent());
} catch (org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException e) {
throw new AlreadyExistsException("Cannot create table %s as it already exists", destTableIdent());
}
}
use of org.apache.spark.sql.connector.expressions.Transform in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkCatalogNamedHiveTable.
@Test
public void testSparkCatalogNamedHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.hive", "org.apache.iceberg.spark.SparkCatalog");
spark.conf().set("spark.sql.catalog.hive.type", "hadoop");
spark.conf().set("spark.sql.catalog.hive.warehouse", tableLocation);
SparkCatalog cat = (SparkCatalog) spark.sessionState().catalogManager().catalog("hive");
String[] database = { "default" };
Identifier id = Identifier.of(database, "table");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = cat.loadTable(id);
spark.sql("INSERT INTO hive.default.table VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.spark.sql.connector.expressions.Transform in project iceberg by apache.
the class TestRemoveOrphanFilesAction3 method testSparkSessionCatalogHiveTable.
@Test
public void testSparkSessionCatalogHiveTable() throws Exception {
spark.conf().set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog");
spark.conf().set("spark.sql.catalog.spark_catalog.type", "hive");
SparkSessionCatalog cat = (SparkSessionCatalog) spark.sessionState().catalogManager().v2SessionCatalog();
String[] database = { "default" };
Identifier id = Identifier.of(database, "sessioncattest");
Map<String, String> options = Maps.newHashMap();
Transform[] transforms = {};
cat.dropTable(id);
cat.createTable(id, SparkSchemaUtil.convert(SCHEMA), transforms, options);
SparkTable table = (SparkTable) cat.loadTable(id);
spark.sql("INSERT INTO default.sessioncattest VALUES (1,1,1)");
String location = table.table().location().replaceFirst("file:", "");
new File(location + "/data/trashfile").createNewFile();
DeleteOrphanFiles.Result results = SparkActions.get().deleteOrphanFiles(table.table()).olderThan(System.currentTimeMillis() + 1000).execute();
Assert.assertTrue("trash file should be removed", StreamSupport.stream(results.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("file:" + location + "/data/trashfile")));
}
use of org.apache.spark.sql.connector.expressions.Transform in project iceberg by apache.
the class Spark3Util method toIcebergTerm.
public static Term toIcebergTerm(Expression expr) {
if (expr instanceof Transform) {
Transform transform = (Transform) expr;
Preconditions.checkArgument(transform.references().length == 1, "Cannot convert transform with more than one column reference: %s", transform);
String colName = DOT.join(transform.references()[0].fieldNames());
switch(transform.name()) {
case "identity":
return org.apache.iceberg.expressions.Expressions.ref(colName);
case "bucket":
return org.apache.iceberg.expressions.Expressions.bucket(colName, findWidth(transform));
case "years":
return org.apache.iceberg.expressions.Expressions.year(colName);
case "months":
return org.apache.iceberg.expressions.Expressions.month(colName);
case "date":
case "days":
return org.apache.iceberg.expressions.Expressions.day(colName);
case "date_hour":
case "hours":
return org.apache.iceberg.expressions.Expressions.hour(colName);
case "truncate":
return org.apache.iceberg.expressions.Expressions.truncate(colName, findWidth(transform));
default:
throw new UnsupportedOperationException("Transform is not supported: " + transform);
}
} else if (expr instanceof NamedReference) {
NamedReference ref = (NamedReference) expr;
return org.apache.iceberg.expressions.Expressions.ref(DOT.join(ref.fieldNames()));
} else {
throw new UnsupportedOperationException("Cannot convert unknown expression: " + expr);
}
}
Aggregations