use of org.apache.spark.sql.connector.expressions.NamedReference in project iceberg by apache.
the class SparkCopyOnWriteOperation method requiredMetadataAttributes.
@Override
public NamedReference[] requiredMetadataAttributes() {
NamedReference file = Expressions.column(MetadataColumns.FILE_PATH.name());
NamedReference pos = Expressions.column(MetadataColumns.ROW_POSITION.name());
if (command == DELETE || command == UPDATE) {
return new NamedReference[] { file, pos };
} else {
return new NamedReference[] { file };
}
}
use of org.apache.spark.sql.connector.expressions.NamedReference in project iceberg by apache.
the class Spark3Util method toIcebergTerm.
public static Term toIcebergTerm(Expression expr) {
if (expr instanceof Transform) {
Transform transform = (Transform) expr;
Preconditions.checkArgument(transform.references().length == 1, "Cannot convert transform with more than one column reference: %s", transform);
String colName = DOT.join(transform.references()[0].fieldNames());
switch(transform.name()) {
case "identity":
return org.apache.iceberg.expressions.Expressions.ref(colName);
case "bucket":
return org.apache.iceberg.expressions.Expressions.bucket(colName, findWidth(transform));
case "years":
return org.apache.iceberg.expressions.Expressions.year(colName);
case "months":
return org.apache.iceberg.expressions.Expressions.month(colName);
case "date":
case "days":
return org.apache.iceberg.expressions.Expressions.day(colName);
case "date_hour":
case "hours":
return org.apache.iceberg.expressions.Expressions.hour(colName);
case "truncate":
return org.apache.iceberg.expressions.Expressions.truncate(colName, findWidth(transform));
default:
throw new UnsupportedOperationException("Transform is not supported: " + transform);
}
} else if (expr instanceof NamedReference) {
NamedReference ref = (NamedReference) expr;
return org.apache.iceberg.expressions.Expressions.ref(DOT.join(ref.fieldNames()));
} else {
throw new UnsupportedOperationException("Cannot convert unknown expression: " + expr);
}
}
use of org.apache.spark.sql.connector.expressions.NamedReference in project iceberg by apache.
the class SparkBatchQueryScan method filterAttributes.
@Override
public NamedReference[] filterAttributes() {
Set<Integer> partitionFieldSourceIds = Sets.newHashSet();
for (Integer specId : specIds()) {
PartitionSpec spec = table().specs().get(specId);
for (PartitionField field : spec.fields()) {
partitionFieldSourceIds.add(field.sourceId());
}
}
Map<Integer, String> quotedNameById = SparkSchemaUtil.indexQuotedNameById(expectedSchema());
return partitionFieldSourceIds.stream().filter(fieldId -> expectedSchema().findField(fieldId) != null).map(fieldId -> Spark3Util.toNamedReference(quotedNameById.get(fieldId))).toArray(NamedReference[]::new);
}
use of org.apache.spark.sql.connector.expressions.NamedReference in project iceberg by apache.
the class SparkPositionDeltaOperation method rowId.
@Override
public NamedReference[] rowId() {
NamedReference file = Expressions.column(MetadataColumns.FILE_PATH.name());
NamedReference pos = Expressions.column(MetadataColumns.ROW_POSITION.name());
return new NamedReference[] { file, pos };
}
use of org.apache.spark.sql.connector.expressions.NamedReference in project iceberg by apache.
the class SparkPositionDeltaOperation method requiredMetadataAttributes.
@Override
public NamedReference[] requiredMetadataAttributes() {
NamedReference specId = Expressions.column(MetadataColumns.SPEC_ID.name());
NamedReference partition = Expressions.column(MetadataColumns.PARTITION_COLUMN_NAME);
return new NamedReference[] { specId, partition };
}
Aggregations