use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.
the class SemanticPropUtil method addSourceFieldOffsets.
/**
* Creates SemanticProperties by adding offsets to each input field index of the given SemanticProperties.
*
* @param props The SemanticProperties to which the offset is added.
* @param numInputFields1 The original number of fields of the first input.
* @param numInputFields2 The original number of fields of the second input.
* @param offset1 The offset that is added to each input field index of the first input.
* @param offset2 The offset that is added to each input field index of the second input.
* @return New SemanticProperties with added offsets.
*/
public static DualInputSemanticProperties addSourceFieldOffsets(DualInputSemanticProperties props, int numInputFields1, int numInputFields2, int offset1, int offset2) {
DualInputSemanticProperties offsetProps = new DualInputSemanticProperties();
// add offset to read fields on first input
if (props.getReadFields(0) != null) {
FieldSet offsetReadFields = new FieldSet();
for (int r : props.getReadFields(0)) {
offsetReadFields = offsetReadFields.addField(r + offset1);
}
offsetProps.addReadFields(0, offsetReadFields);
}
// add offset to read fields on second input
if (props.getReadFields(1) != null) {
FieldSet offsetReadFields = new FieldSet();
for (int r : props.getReadFields(1)) {
offsetReadFields = offsetReadFields.addField(r + offset2);
}
offsetProps.addReadFields(1, offsetReadFields);
}
// add offset to forward fields on first input
for (int s = 0; s < numInputFields1; s++) {
FieldSet targetFields = props.getForwardingTargetFields(0, s);
for (int t : targetFields) {
offsetProps.addForwardedField(0, s + offset1, t);
}
}
// add offset to forward fields on second input
for (int s = 0; s < numInputFields2; s++) {
FieldSet targetFields = props.getForwardingTargetFields(1, s);
for (int t : targetFields) {
offsetProps.addForwardedField(1, s + offset2, t);
}
}
return offsetProps;
}
use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.
the class SemanticPropUtil method createProjectionPropertiesDual.
public static DualInputSemanticProperties createProjectionPropertiesDual(int[] fields, boolean[] isFromFirst, TypeInformation<?> inType1, TypeInformation<?> inType2) {
DualInputSemanticProperties dsp = new DualInputSemanticProperties();
int[] sourceOffsets1;
if (inType1 instanceof TupleTypeInfo<?>) {
sourceOffsets1 = new int[inType1.getArity()];
sourceOffsets1[0] = 0;
for (int i = 1; i < inType1.getArity(); i++) {
sourceOffsets1[i] = ((TupleTypeInfo<?>) inType1).getTypeAt(i - 1).getTotalFields() + sourceOffsets1[i - 1];
}
} else {
sourceOffsets1 = new int[] { 0 };
}
int[] sourceOffsets2;
if (inType2 instanceof TupleTypeInfo<?>) {
sourceOffsets2 = new int[inType2.getArity()];
sourceOffsets2[0] = 0;
for (int i = 1; i < inType2.getArity(); i++) {
sourceOffsets2[i] = ((TupleTypeInfo<?>) inType2).getTypeAt(i - 1).getTotalFields() + sourceOffsets2[i - 1];
}
} else {
sourceOffsets2 = new int[] { 0 };
}
int targetOffset = 0;
for (int i = 0; i < fields.length; i++) {
int sourceOffset;
int numFieldsToCopy;
int input;
if (isFromFirst[i]) {
input = 0;
if (fields[i] == -1) {
sourceOffset = 0;
numFieldsToCopy = inType1.getTotalFields();
} else {
sourceOffset = sourceOffsets1[fields[i]];
numFieldsToCopy = ((TupleTypeInfo<?>) inType1).getTypeAt(fields[i]).getTotalFields();
}
} else {
input = 1;
if (fields[i] == -1) {
sourceOffset = 0;
numFieldsToCopy = inType2.getTotalFields();
} else {
sourceOffset = sourceOffsets2[fields[i]];
numFieldsToCopy = ((TupleTypeInfo<?>) inType2).getTypeAt(fields[i]).getTotalFields();
}
}
for (int j = 0; j < numFieldsToCopy; j++) {
dsp.addForwardedField(input, sourceOffset + j, targetOffset + j);
}
targetOffset += numFieldsToCopy;
}
return dsp;
}
use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.
the class TwoInputUdfOperator method withForwardedFieldsFirst.
/**
* <p>
* Adds semantic information about forwarded fields of the first input of the user-defined function.
* The forwarded fields information declares fields which are never modified by the function and
* which are forwarded at the same position to the output or unchanged copied to another position in the output.
* </p>
*
* <p>
* Fields that are forwarded at the same position are specified by their position.
* The specified position must be valid for the input and output data type and have the same type.
* For example <code>withForwardedFieldsFirst("f2")</code> declares that the third field of a Java input tuple
* from the first input is copied to the third field of an output tuple.
* </p>
*
* <p>
* Fields which are unchanged copied from the first input to another position in the output are declared
* by specifying the source field reference in the first input and the target field reference in the output.
* {@code withForwardedFieldsFirst("f0->f2")} denotes that the first field of the first input Java tuple is
* unchanged copied to the third field of the Java output tuple. When using a wildcard ("*") ensure that
* the number of declared fields and their types in first input and output type match.
* </p>
*
* <p>
* Multiple forwarded fields can be annotated in one ({@code withForwardedFieldsFirst("f2; f3->f0; f4")})
* or separate Strings ({@code withForwardedFieldsFirst("f2", "f3->f0", "f4")}).
* Please refer to the JavaDoc of {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
* details on field references such as nested fields and wildcard.
* </p>
*
* <p>
* It is not possible to override existing semantic information about forwarded fields of the first input which was
* for example added by a {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst} class annotation.
* </p>
*
* <p>
* <b>NOTE: Adding semantic information for functions is optional!
* If used correctly, semantic information can help the Flink optimizer to generate more efficient execution plans.
* However, incorrect semantic information can cause the optimizer to generate incorrect execution plans which compute wrong results!
* So be careful when adding semantic information.
* </b>
* </p>
*
* @param forwardedFieldsFirst A list of forwarded field expressions for the first input of the function.
* @return This operator with annotated forwarded field information.
*
* @see org.apache.flink.api.java.functions.FunctionAnnotation
* @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst
*/
@SuppressWarnings("unchecked")
public O withForwardedFieldsFirst(String... forwardedFieldsFirst) {
if (this.udfSemantics == null || this.analyzedUdfSemantics) {
// extract semantic properties from function annotations
setSemanticProperties(extractSemanticAnnotationsFromUdf(getFunction().getClass()));
}
if (this.udfSemantics == null || this.analyzedUdfSemantics) {
setSemanticProperties(new DualInputSemanticProperties());
SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, forwardedFieldsFirst, null, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
} else {
if (this.udfWithForwardedFieldsFirstAnnotation(getFunction().getClass())) {
// refuse semantic information as it would override the function annotation
throw new SemanticProperties.InvalidSemanticAnnotationException("Forwarded field information " + "has already been added by a function annotation for the first input of this operator. " + "Cannot overwrite function annotations.");
} else {
SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, forwardedFieldsFirst, null, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
}
}
O returnType = (O) this;
return returnType;
}
use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.
the class TwoInputUdfOperator method withForwardedFieldsSecond.
/**
* <p>
* Adds semantic information about forwarded fields of the second input of the user-defined function.
* The forwarded fields information declares fields which are never modified by the function and
* which are forwarded at the same position to the output or unchanged copied to another position in the output.
* </p>
*
* <p>
* Fields that are forwarded at the same position are specified by their position.
* The specified position must be valid for the input and output data type and have the same type.
* For example <code>withForwardedFieldsSecond("f2")</code> declares that the third field of a Java input tuple
* from the second input is copied to the third field of an output tuple.
* </p>
*
* <p>
* Fields which are unchanged copied from the second input to another position in the output are declared
* by specifying the source field reference in the second input and the target field reference in the output.
* {@code withForwardedFieldsSecond("f0->f2")} denotes that the first field of the second input Java tuple is
* unchanged copied to the third field of the Java output tuple. When using a wildcard ("*") ensure that
* the number of declared fields and their types in second input and output type match.
* </p>
*
* <p>
* Multiple forwarded fields can be annotated in one ({@code withForwardedFieldsSecond("f2; f3->f0; f4")})
* or separate Strings ({@code withForwardedFieldsSecond("f2", "f3->f0", "f4")}).
* Please refer to the JavaDoc of {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
* details on field references such as nested fields and wildcard.
* </p>
*
* <p>
* It is not possible to override existing semantic information about forwarded fields of the second input which was
* for example added by a {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond} class annotation.
* </p>
*
* <p>
* <b>NOTE: Adding semantic information for functions is optional!
* If used correctly, semantic information can help the Flink optimizer to generate more efficient execution plans.
* However, incorrect semantic information can cause the optimizer to generate incorrect execution plans which compute wrong results!
* So be careful when adding semantic information.
* </b>
* </p>
*
* @param forwardedFieldsSecond A list of forwarded field expressions for the second input of the function.
* @return This operator with annotated forwarded field information.
*
* @see org.apache.flink.api.java.functions.FunctionAnnotation
* @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond
*/
@SuppressWarnings("unchecked")
public O withForwardedFieldsSecond(String... forwardedFieldsSecond) {
if (this.udfSemantics == null || this.analyzedUdfSemantics) {
// extract semantic properties from function annotations
setSemanticProperties(extractSemanticAnnotationsFromUdf(getFunction().getClass()));
}
if (this.udfSemantics == null || this.analyzedUdfSemantics) {
setSemanticProperties(new DualInputSemanticProperties());
SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, null, forwardedFieldsSecond, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
} else {
if (udfWithForwardedFieldsSecondAnnotation(getFunction().getClass())) {
// refuse semantic information as it would override the function annotation
throw new SemanticProperties.InvalidSemanticAnnotationException("Forwarded field information " + "has already been added by a function annotation for the second input of this operator. " + "Cannot overwrite function annotations.");
} else {
SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, null, forwardedFieldsSecond, null, null, null, null, getInput1Type(), getInput2Type(), getResultType());
}
}
O returnType = (O) this;
return returnType;
}
use of org.apache.flink.api.common.operators.DualInputSemanticProperties in project flink by apache.
the class SemanticPropertiesProjectionTest method testJoinProjectionSemProps1.
@Test
public void testJoinProjectionSemProps1() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
tupleDs.join(tupleDs).where(0).equalTo(0).projectFirst(2, 3).projectSecond(1, 4).output(new DiscardingOutputFormat<Tuple>());
Plan plan = env.createProgramPlan();
GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
InnerJoinOperatorBase<?, ?, ?, ?> projectJoinOperator = ((InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput());
DualInputSemanticProperties props = projectJoinOperator.getSemanticProperties();
assertEquals(1, props.getForwardingTargetFields(0, 2).size());
assertEquals(1, props.getForwardingTargetFields(0, 3).size());
assertEquals(1, props.getForwardingTargetFields(1, 1).size());
assertEquals(1, props.getForwardingTargetFields(1, 4).size());
assertTrue(props.getForwardingTargetFields(0, 2).contains(0));
assertTrue(props.getForwardingTargetFields(0, 3).contains(1));
assertTrue(props.getForwardingTargetFields(1, 1).contains(2));
assertTrue(props.getForwardingTargetFields(1, 4).contains(3));
}
Aggregations