Search in sources :

Example 1 with NamedExpression

use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project iceberg by apache.

the class SparkTableUtil method resolveAttrs.

private static Expression resolveAttrs(SparkSession spark, String table, Expression expr) {
    Function2<String, String, Object> resolver = spark.sessionState().analyzer().resolver();
    LogicalPlan plan = spark.table(table).queryExecution().analyzed();
    return expr.transform(new AbstractPartialFunction<Expression, Expression>() {

        @Override
        public Expression apply(Expression attr) {
            UnresolvedAttribute unresolvedAttribute = (UnresolvedAttribute) attr;
            Option<NamedExpression> namedExpressionOption = plan.resolve(unresolvedAttribute.nameParts(), resolver);
            if (namedExpressionOption.isDefined()) {
                return (Expression) namedExpressionOption.get();
            } else {
                throw new IllegalArgumentException(String.format("Could not resolve %s using columns: %s", attr, plan.output()));
            }
        }

        @Override
        public boolean isDefinedAt(Expression attr) {
            return attr instanceof UnresolvedAttribute;
        }
    });
}
Also used : UnresolvedAttribute(org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute) Expression(org.apache.spark.sql.catalyst.expressions.Expression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) Option(scala.Option)

Example 2 with NamedExpression

use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.

the class DataIoAdapter method createAggregationInfo.

private Optional<AggregationInfo> createAggregationInfo(List<AggregateFunction> aggregateFunctions, List<NamedExpression> namedExpressions) {
    List<RowExpression> groupingKeys = new ArrayList<>();
    Map<String, AggregationInfo.AggregateFunction> aggregationMap = new LinkedHashMap<>();
    boolean isEmpty = true;
    for (NamedExpression namedExpression : namedExpressions) {
        RowExpression groupingKey = extractNamedExpression((Expression) namedExpression);
        groupingKeys.add(groupingKey);
        isEmpty = false;
    }
    for (AggregateFunction aggregateFunction : aggregateFunctions) {
        extractAggregateFunction(aggregateFunction, aggregationMap);
        isEmpty = false;
    }
    return isEmpty ? Optional.empty() : Optional.of(new AggregationInfo(aggregationMap, groupingKeys));
}
Also used : NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) AggregationInfo(com.huawei.boostkit.omnidata.model.AggregationInfo) LinkedHashMap(java.util.LinkedHashMap)

Example 3 with NamedExpression

use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.

the class DataIoAdapter method extractAggAndGroupExpression.

private Optional<AggregationInfo> extractAggAndGroupExpression(List<AggExeInfo> aggExecutionList) {
    Optional<AggregationInfo> resAggregationInfo = Optional.empty();
    for (AggExeInfo aggExeInfo : aggExecutionList) {
        List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfo.aggregateExpressions());
        List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfo.groupingExpressions());
        resAggregationInfo = createAggregationInfo(aggregateExpressions, namedExpressions);
    }
    return resAggregationInfo;
}
Also used : AggExeInfo(org.apache.spark.sql.execution.ndp.AggExeInfo) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction) AggregationInfo(com.huawei.boostkit.omnidata.model.AggregationInfo)

Example 4 with NamedExpression

use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project boostkit-bigdata by kunpengcompute.

the class NdpUtils method getColumnOffsetByAggExeInfo.

public static int getColumnOffsetByAggExeInfo(StructType dataSchema, Seq<AggExeInfo> aggExeInfo) {
    String columnName = "";
    int columnTempId = 0;
    if (aggExeInfo != null && aggExeInfo.size() > 0) {
        List<AggExeInfo> aggExecutionList = JavaConverters.seqAsJavaList(aggExeInfo);
        for (AggExeInfo aggExeInfoTemp : aggExecutionList) {
            List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.aggregateExpressions());
            for (AggregateFunction aggregateFunction : aggregateExpressions) {
                List<Expression> expressions = JavaConverters.seqAsJavaList(aggregateFunction.children());
                for (Expression expression : expressions) {
                    columnName = expression.toString().split("#")[0].replaceAll("\\(", "");
                    Pattern pattern = Pattern.compile(columnName + "#(\\d+)");
                    Matcher matcher = pattern.matcher(expression.toString());
                    if (matcher.find()) {
                        columnTempId = Integer.parseInt(matcher.group(1));
                        break;
                    }
                }
                break;
            }
            List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.groupingExpressions());
            for (NamedExpression namedExpression : namedExpressions) {
                columnName = namedExpression.toString().split("#")[0];
                columnTempId = NdpUtils.getColumnId(namedExpression.toString());
                break;
            }
        }
    }
    Map<String, Integer> columnMap = new HashMap<>();
    scala.collection.Iterator<StructField> allTableSchemas = dataSchema.iterator();
    int dataSchemaColumnNum = 0;
    while (allTableSchemas.hasNext()) {
        StructField structField = allTableSchemas.next();
        columnMap.put(structField.name(), dataSchemaColumnNum++);
    }
    int columnOffset = columnTempId - columnMap.getOrDefault(columnName, columnMap.size());
    return Math.abs(columnOffset);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) HashMap(java.util.HashMap) AggExeInfo(org.apache.spark.sql.execution.ndp.AggExeInfo) StructField(org.apache.spark.sql.types.StructField) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) Expression(org.apache.spark.sql.catalyst.expressions.Expression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)

Example 5 with NamedExpression

use of org.apache.spark.sql.catalyst.expressions.NamedExpression in project OpenLineage by OpenLineage.

the class UnknownEntryFacetListenerTest method testBuildUnknownFacet.

@Test
void testBuildUnknownFacet() {
    UnknownEntryFacetListener underTest = new UnknownEntryFacetListener();
    NamedExpression reference = new AttributeReference("test", DataType.fromDDL("`gender` STRING"), false, Metadata$.MODULE$.fromJson("{\"__CHAR_VARCHAR_TYPE_STRING\":\"varchar(64)\"}"), ExprId.apply(1L), Seq$.MODULE$.<String>newBuilder().result());
    ListFilesCommand logicalPlan = new ListFilesCommand(Seq$.MODULE$.<String>newBuilder().$plus$eq("./test.file").result());
    Project project = new Project(Seq$.MODULE$.<NamedExpression>newBuilder().$plus$eq(reference).result(), logicalPlan);
    UnknownEntryFacet facet = underTest.build(project).get();
    assertThat(facet.getOutput().getInputAttributes()).hasSize(1).first().hasFieldOrPropertyWithValue("name", "Results").hasFieldOrPropertyWithValue("type", "string");
    assertThat(facet.getOutput().getOutputAttributes()).hasSize(1).first().hasFieldOrPropertyWithValue("name", "test").hasFieldOrPropertyWithValue("type", "struct").extracting("metadata").asInstanceOf(InstanceOfAssertFactories.MAP).containsEntry("__CHAR_VARCHAR_TYPE_STRING", "varchar(64)");
    assertThat(facet.getInputs()).hasSize(1).first().extracting("inputAttributes").asList().hasSize(0);
    assertThat(facet.getInputs()).hasSize(1).first().extracting("outputAttributes").asList().hasSize(1).first().hasFieldOrPropertyWithValue("name", "Results").hasFieldOrPropertyWithValue("type", "string");
}
Also used : Project(org.apache.spark.sql.catalyst.plans.logical.Project) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AttributeReference(org.apache.spark.sql.catalyst.expressions.AttributeReference) ListFilesCommand(org.apache.spark.sql.execution.command.ListFilesCommand) UnknownEntryFacet(io.openlineage.spark.agent.facets.UnknownEntryFacet) Test(org.junit.jupiter.api.Test)

Aggregations

NamedExpression (org.apache.spark.sql.catalyst.expressions.NamedExpression)6 AggregateFunction (org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)3 AggregationInfo (com.huawei.boostkit.omnidata.model.AggregationInfo)2 UnknownEntryFacet (io.openlineage.spark.agent.facets.UnknownEntryFacet)2 AttributeReference (org.apache.spark.sql.catalyst.expressions.AttributeReference)2 Expression (org.apache.spark.sql.catalyst.expressions.Expression)2 Project (org.apache.spark.sql.catalyst.plans.logical.Project)2 ListFilesCommand (org.apache.spark.sql.execution.command.ListFilesCommand)2 AggExeInfo (org.apache.spark.sql.execution.ndp.AggExeInfo)2 Test (org.junit.jupiter.api.Test)2 ConstantExpression (io.prestosql.spi.relation.ConstantExpression)1 RowExpression (io.prestosql.spi.relation.RowExpression)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 UnresolvedAttribute (org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1 StructField (org.apache.spark.sql.types.StructField)1