Search in sources :

Example 16 with DualInputSemanticProperties

use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.

the class SemanticPropUtil method addSourceFieldOffsets.

/**
	 * Creates SemanticProperties by adding offsets to each input field index of the given SemanticProperties.
	 *
	 * @param props The SemanticProperties to which the offset is added.
	 * @param numInputFields1 The original number of fields of the first input.
	 * @param numInputFields2 The original number of fields of the second input.
	 * @param offset1 The offset that is added to each input field index of the first input.
	 * @param offset2 The offset that is added to each input field index of the second input.
	 * @return New SemanticProperties with added offsets.
	 */
public static DualInputSemanticProperties addSourceFieldOffsets(DualInputSemanticProperties props, int numInputFields1, int numInputFields2, int offset1, int offset2) {
    DualInputSemanticProperties offsetProps = new DualInputSemanticProperties();
    // add offset to read fields on first input
    if (props.getReadFields(0) != null) {
        FieldSet offsetReadFields = new FieldSet();
        for (int r : props.getReadFields(0)) {
            offsetReadFields = offsetReadFields.addField(r + offset1);
        }
        offsetProps.addReadFields(0, offsetReadFields);
    }
    // add offset to read fields on second input
    if (props.getReadFields(1) != null) {
        FieldSet offsetReadFields = new FieldSet();
        for (int r : props.getReadFields(1)) {
            offsetReadFields = offsetReadFields.addField(r + offset2);
        }
        offsetProps.addReadFields(1, offsetReadFields);
    }
    // add offset to forward fields on first input
    for (int s = 0; s < numInputFields1; s++) {
        FieldSet targetFields = props.getForwardingTargetFields(0, s);
        for (int t : targetFields) {
            offsetProps.addForwardedField(0, s + offset1, t);
        }
    }
    // add offset to forward fields on second input
    for (int s = 0; s < numInputFields2; s++) {
        FieldSet targetFields = props.getForwardingTargetFields(1, s);
        for (int t : targetFields) {
            offsetProps.addForwardedField(1, s + offset2, t);
        }
    }
    return offsetProps;
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties)

Example 17 with DualInputSemanticProperties

use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.

the class SemanticPropUtil method createProjectionPropertiesDual.

public static DualInputSemanticProperties createProjectionPropertiesDual(int[] fields, boolean[] isFromFirst, TypeInformation<?> inType1, TypeInformation<?> inType2) {
    DualInputSemanticProperties dsp = new DualInputSemanticProperties();
    int[] sourceOffsets1;
    if (inType1 instanceof TupleTypeInfo<?>) {
        sourceOffsets1 = new int[inType1.getArity()];
        sourceOffsets1[0] = 0;
        for (int i = 1; i < inType1.getArity(); i++) {
            sourceOffsets1[i] = ((TupleTypeInfo<?>) inType1).getTypeAt(i - 1).getTotalFields() + sourceOffsets1[i - 1];
        }
    } else {
        sourceOffsets1 = new int[] { 0 };
    }
    int[] sourceOffsets2;
    if (inType2 instanceof TupleTypeInfo<?>) {
        sourceOffsets2 = new int[inType2.getArity()];
        sourceOffsets2[0] = 0;
        for (int i = 1; i < inType2.getArity(); i++) {
            sourceOffsets2[i] = ((TupleTypeInfo<?>) inType2).getTypeAt(i - 1).getTotalFields() + sourceOffsets2[i - 1];
        }
    } else {
        sourceOffsets2 = new int[] { 0 };
    }
    int targetOffset = 0;
    for (int i = 0; i < fields.length; i++) {
        int sourceOffset;
        int numFieldsToCopy;
        int input;
        if (isFromFirst[i]) {
            input = 0;
            if (fields[i] == -1) {
                sourceOffset = 0;
                numFieldsToCopy = inType1.getTotalFields();
            } else {
                sourceOffset = sourceOffsets1[fields[i]];
                numFieldsToCopy = ((TupleTypeInfo<?>) inType1).getTypeAt(fields[i]).getTotalFields();
            }
        } else {
            input = 1;
            if (fields[i] == -1) {
                sourceOffset = 0;
                numFieldsToCopy = inType2.getTotalFields();
            } else {
                sourceOffset = sourceOffsets2[fields[i]];
                numFieldsToCopy = ((TupleTypeInfo<?>) inType2).getTypeAt(fields[i]).getTotalFields();
            }
        }
        for (int j = 0; j < numFieldsToCopy; j++) {
            dsp.addForwardedField(input, sourceOffset + j, targetOffset + j);
        }
        targetOffset += numFieldsToCopy;
    }
    return dsp;
}
Also used : DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo)

Example 18 with DualInputSemanticProperties

use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.

the class TwoInputUdfOperator method withForwardedFieldsFirst.

/**
	 * <p>
	 * Adds semantic information about forwarded fields of the first input of the user-defined function.
	 * The forwarded fields information declares fields which are never modified by the function and
	 * which are forwarded at the same position to the output or unchanged copied to another position in the output.
	 * </p>
	 *
	 * <p>
	 * Fields that are forwarded at the same position are specified by their position.
	 * The specified position must be valid for the input and output data type and have the same type.
	 * For example <code>withForwardedFieldsFirst("f2")</code> declares that the third field of a Java input tuple
	 * from the first input is copied to the third field of an output tuple.
	 * </p>
	 *
	 * <p>
	 * Fields which are unchanged copied from the first input to another position in the output are declared
	 * by specifying the source field reference in the first input and the target field reference in the output.
	 * {@code withForwardedFieldsFirst("f0->f2")} denotes that the first field of the first input Java tuple is
	 * unchanged copied to the third field of the Java output tuple. When using a wildcard ("*") ensure that
	 * the number of declared fields and their types in first input and output type match.
	 * </p>
	 *
	 * <p>
	 * Multiple forwarded fields can be annotated in one ({@code withForwardedFieldsFirst("f2; f3->f0; f4")})
	 * or separate Strings ({@code withForwardedFieldsFirst("f2", "f3->f0", "f4")}).
	 * Please refer to the JavaDoc of {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
	 * details on field references such as nested fields and wildcard.
	 * </p>
	 *
	 * <p>
	 * It is not possible to override existing semantic information about forwarded fields of the first input which was
	 * for example added by a {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst} class annotation.
	 * </p>
	 *
	 * <p>
	 * <b>NOTE: Adding semantic information for functions is optional!
	 * If used correctly, semantic information can help the Flink optimizer to generate more efficient execution plans.
	 * However, incorrect semantic information can cause the optimizer to generate incorrect execution plans which compute wrong results!
	 * So be careful when adding semantic information.
	 * </b>
	 * </p>
	 *
	 * @param forwardedFieldsFirst A list of forwarded field expressions for the first input of the function.
	 * @return This operator with annotated forwarded field information.
	 *
	 * @see org.apache.flink.api.java.functions.FunctionAnnotation
	 * @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst
	 */
@SuppressWarnings("unchecked")
public O withForwardedFieldsFirst(String... forwardedFieldsFirst) {
    if (this.udfSemantics == null || this.analyzedUdfSemantics) {
        // extract semantic properties from function annotations
        setSemanticProperties(extractSemanticAnnotationsFromUdf(getFunction().getClass()));
    }
    if (this.udfSemantics == null || this.analyzedUdfSemantics) {
        setSemanticProperties(new DualInputSemanticProperties());
        SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, forwardedFieldsFirst, null, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
    } else {
        if (this.udfWithForwardedFieldsFirstAnnotation(getFunction().getClass())) {
            // refuse semantic information as it would override the function annotation
            throw new SemanticProperties.InvalidSemanticAnnotationException("Forwarded field information " + "has already been added by a function annotation for the first input of this operator. " + "Cannot overwrite function annotations.");
        } else {
            SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, forwardedFieldsFirst, null, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
        }
    }
    O returnType = (O) this;
    return returnType;
}
Also used : DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties)

Example 19 with DualInputSemanticProperties

use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.

the class TwoInputUdfOperator method withForwardedFieldsSecond.

/**
	 * <p>
	 * Adds semantic information about forwarded fields of the second input of the user-defined function.
	 * The forwarded fields information declares fields which are never modified by the function and
	 * which are forwarded at the same position to the output or unchanged copied to another position in the output.
	 * </p>
	 *
	 * <p>
	 * Fields that are forwarded at the same position are specified by their position.
	 * The specified position must be valid for the input and output data type and have the same type.
	 * For example <code>withForwardedFieldsSecond("f2")</code> declares that the third field of a Java input tuple
	 * from the second input is copied to the third field of an output tuple.
	 * </p>
	 *
	 * <p>
	 * Fields which are unchanged copied from the second input to another position in the output are declared
	 * by specifying the source field reference in the second input and the target field reference in the output.
	 * {@code withForwardedFieldsSecond("f0->f2")} denotes that the first field of the second input Java tuple is
	 * unchanged copied to the third field of the Java output tuple. When using a wildcard ("*") ensure that
	 * the number of declared fields and their types in second input and output type match.
	 * </p>
	 *
	 * <p>
	 * Multiple forwarded fields can be annotated in one ({@code withForwardedFieldsSecond("f2; f3->f0; f4")})
	 * or separate Strings ({@code withForwardedFieldsSecond("f2", "f3->f0", "f4")}).
	 * Please refer to the JavaDoc of {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
	 * details on field references such as nested fields and wildcard.
	 * </p>
	 *
	 * <p>
	 * It is not possible to override existing semantic information about forwarded fields of the second input which was
	 * for example added by a {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond} class annotation.
	 * </p>
	 *
	 * <p>
	 * <b>NOTE: Adding semantic information for functions is optional!
	 * If used correctly, semantic information can help the Flink optimizer to generate more efficient execution plans.
	 * However, incorrect semantic information can cause the optimizer to generate incorrect execution plans which compute wrong results!
	 * So be careful when adding semantic information.
	 * </b>
	 * </p>
	 *
	 * @param forwardedFieldsSecond A list of forwarded field expressions for the second input of the function.
	 * @return This operator with annotated forwarded field information.
	 *
	 * @see org.apache.flink.api.java.functions.FunctionAnnotation
	 * @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond
	 */
@SuppressWarnings("unchecked")
public O withForwardedFieldsSecond(String... forwardedFieldsSecond) {
    if (this.udfSemantics == null || this.analyzedUdfSemantics) {
        // extract semantic properties from function annotations
        setSemanticProperties(extractSemanticAnnotationsFromUdf(getFunction().getClass()));
    }
    if (this.udfSemantics == null || this.analyzedUdfSemantics) {
        setSemanticProperties(new DualInputSemanticProperties());
        SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, null, forwardedFieldsSecond, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
    } else {
        if (udfWithForwardedFieldsSecondAnnotation(getFunction().getClass())) {
            // refuse semantic information as it would override the function annotation
            throw new SemanticProperties.InvalidSemanticAnnotationException("Forwarded field information " + "has already been added by a function annotation for the second input of this operator. " + "Cannot overwrite function annotations.");
        } else {
            SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, null, forwardedFieldsSecond, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
        }
    }
    O returnType = (O) this;
    return returnType;
}
Also used : DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties)

Example 20 with DualInputSemanticProperties

use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.

the class SemanticPropertiesProjectionTest method testJoinProjectionSemProps1.

@Test
public void testJoinProjectionSemProps1() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
    tupleDs.join(tupleDs).where(0).equalTo(0).projectFirst(2, 3).projectSecond(1, 4).output(new DiscardingOutputFormat<Tuple>());
    Plan plan = env.createProgramPlan();
    GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
    InnerJoinOperatorBase<?, ?, ?, ?> projectJoinOperator = ((InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput());
    DualInputSemanticProperties props = projectJoinOperator.getSemanticProperties();
    assertEquals(1, props.getForwardingTargetFields(0, 2).size());
    assertEquals(1, props.getForwardingTargetFields(0, 3).size());
    assertEquals(1, props.getForwardingTargetFields(1, 1).size());
    assertEquals(1, props.getForwardingTargetFields(1, 4).size());
    assertTrue(props.getForwardingTargetFields(0, 2).contains(0));
    assertTrue(props.getForwardingTargetFields(0, 3).contains(1));
    assertTrue(props.getForwardingTargetFields(1, 1).contains(2));
    assertTrue(props.getForwardingTargetFields(1, 4).contains(3));
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) DualInputSemanticProperties(org.apache.flink.api.common.operators.DualInputSemanticProperties) Test(org.junit.Test)

Aggregations

DualInputSemanticProperties (org.apache.flink.api.common.operators.DualInputSemanticProperties)36 Test (org.junit.Test)24 Plan (org.apache.flink.api.common.Plan)11 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)8 InnerJoinOperatorBase (org.apache.flink.api.common.operators.base.InnerJoinOperatorBase)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)6 InvalidSemanticAnnotationException (org.apache.flink.api.common.operators.SemanticProperties.InvalidSemanticAnnotationException)4 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)4 Matcher (java.util.regex.Matcher)3 FlatFieldDescriptor (org.apache.flink.api.common.typeutils.CompositeType.FlatFieldDescriptor)3 InvalidFieldReferenceException (org.apache.flink.api.common.typeutils.CompositeType.InvalidFieldReferenceException)3 Annotation (java.lang.annotation.Annotation)2 SemanticProperties (org.apache.flink.api.common.operators.SemanticProperties)2 DualInputOperator (org.apache.flink.api.common.operators.DualInputOperator)1 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)1 FunctionAnnotation (org.apache.flink.api.java.functions.FunctionAnnotation)1 ForwardedFields (org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields)1 ForwardedFieldsFirst (org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst)1