use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project iceberg by apache.
the class SparkTableUtil method resolveAttrs.
private static Expression resolveAttrs(SparkSession spark, String table, Expression expr) {
Function2<String, String, Object> resolver = spark.sessionState().analyzer().resolver();
LogicalPlan plan = spark.table(table).queryExecution().analyzed();
return expr.transform(new AbstractPartialFunction<Expression, Expression>() {
@Override
public Expression apply(Expression attr) {
UnresolvedAttribute unresolvedAttribute = (UnresolvedAttribute) attr;
Option<NamedExpression> namedExpressionOption = plan.resolve(unresolvedAttribute.nameParts(), resolver);
if (namedExpressionOption.isDefined()) {
return (Expression) namedExpressionOption.get();
} else {
throw new IllegalArgumentException(String.format("Could not resolve %s using columns: %s", attr, plan.output()));
}
}
@Override
public boolean isDefinedAt(Expression attr) {
return attr instanceof UnresolvedAttribute;
}
});
}
use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.
the class DataIoAdapter method createAggregationInfo.
private Optional<AggregationInfo> createAggregationInfo(List<AggregateFunction> aggregateFunctions, List<NamedExpression> namedExpressions) {
List<RowExpression> groupingKeys = new ArrayList<>();
Map<String, AggregationInfo.AggregateFunction> aggregationMap = new LinkedHashMap<>();
boolean isEmpty = true;
for (NamedExpression namedExpression : namedExpressions) {
RowExpression groupingKey = extractNamedExpression((Expression) namedExpression);
groupingKeys.add(groupingKey);
isEmpty = false;
}
for (AggregateFunction aggregateFunction : aggregateFunctions) {
extractAggregateFunction(aggregateFunction, aggregationMap);
isEmpty = false;
}
return isEmpty ? Optional.empty() : Optional.of(new AggregationInfo(aggregationMap, groupingKeys));
}
use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.
the class DataIoAdapter method extractAggAndGroupExpression.
private Optional<AggregationInfo> extractAggAndGroupExpression(List<AggExeInfo> aggExecutionList) {
Optional<AggregationInfo> resAggregationInfo = Optional.empty();
for (AggExeInfo aggExeInfo : aggExecutionList) {
List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfo.aggregateExpressions());
List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfo.groupingExpressions());
resAggregationInfo = createAggregationInfo(aggregateExpressions, namedExpressions);
}
return resAggregationInfo;
}
use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.
the class NdpUtils method getColumnOffsetByAggExeInfo.
public static int getColumnOffsetByAggExeInfo(StructType dataSchema, Seq<AggExeInfo> aggExeInfo) {
String columnName = "";
int columnTempId = 0;
if (aggExeInfo != null && aggExeInfo.size() > 0) {
List<AggExeInfo> aggExecutionList = JavaConverters.seqAsJavaList(aggExeInfo);
for (AggExeInfo aggExeInfoTemp : aggExecutionList) {
List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.aggregateExpressions());
for (AggregateFunction aggregateFunction : aggregateExpressions) {
List<Expression> expressions = JavaConverters.seqAsJavaList(aggregateFunction.children());
for (Expression expression : expressions) {
columnName = expression.toString().split("#")[0].replaceAll("\\(", "");
Pattern pattern = Pattern.compile(columnName + "#(\\d+)");
Matcher matcher = pattern.matcher(expression.toString());
if (matcher.find()) {
columnTempId = Integer.parseInt(matcher.group(1));
break;
}
}
break;
}
List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.groupingExpressions());
for (NamedExpression namedExpression : namedExpressions) {
columnName = namedExpression.toString().split("#")[0];
columnTempId = NdpUtils.getColumnId(namedExpression.toString());
break;
}
}
}
Map<String, Integer> columnMap = new HashMap<>();
scala.collection.Iterator<StructField> allTableSchemas = dataSchema.iterator();
int dataSchemaColumnNum = 0;
while (allTableSchemas.hasNext()) {
StructField structField = allTableSchemas.next();
columnMap.put(structField.name(), dataSchemaColumnNum++);
}
int columnOffset = columnTempId - columnMap.getOrDefault(columnName, columnMap.size());
return Math.abs(columnOffset);
}
use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project OpenLineage by OpenLineage.
the class UnknownEntryFacetListenerTest method testBuildUnknownFacet.
@Test
void testBuildUnknownFacet() {
UnknownEntryFacetListener underTest = new UnknownEntryFacetListener();
NamedExpression reference = new AttributeReference("test", DataType.fromDDL("`gender` STRING"), false, Metadata$.MODULE$.fromJson("{\"__CHAR_VARCHAR_TYPE_STRING\":\"varchar(64)\"}"), ExprId.apply(1L), Seq$.MODULE$.<String>newBuilder().result());
ListFilesCommand logicalPlan = new ListFilesCommand(Seq$.MODULE$.<String>newBuilder().$plus$eq("./test.file").result());
Project project = new Project(Seq$.MODULE$.<NamedExpression>newBuilder().$plus$eq(reference).result(), logicalPlan);
UnknownEntryFacet facet = underTest.build(project).get();
assertThat(facet.getOutput().getInputAttributes()).hasSize(1).first().hasFieldOrPropertyWithValue("name", "Results").hasFieldOrPropertyWithValue("type", "string");
assertThat(facet.getOutput().getOutputAttributes()).hasSize(1).first().hasFieldOrPropertyWithValue("name", "test").hasFieldOrPropertyWithValue("type", "struct").extracting("metadata").asInstanceOf(InstanceOfAssertFactories.MAP).containsEntry("__CHAR_VARCHAR_TYPE_STRING", "varchar(64)");
assertThat(facet.getInputs()).hasSize(1).first().extracting("inputAttributes").asList().hasSize(0);
assertThat(facet.getInputs()).hasSize(1).first().extracting("outputAttributes").asList().hasSize(1).first().hasFieldOrPropertyWithValue("name", "Results").hasFieldOrPropertyWithValue("type", "string");
}
Aggregations