use of org.apache.flink.api.common.InvalidProgramException in project flink by apache.
the class PartitionITCase method testRangePartitionInIteration.
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset().partitionByRange(// Verify that range partition is not allowed in iteration
1).join(it.getSolutionSet()).where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
// should fail
result.collect();
}
use of org.apache.flink.api.common.InvalidProgramException in project flink by apache.
the class CoGroupITCase method testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner.
@Test
public void testCoGroupWithMultipleKeyFieldsWithInnerClassKeyExtractorWithoutClosureCleaner() throws Exception {
/*
* CoGroup with multiple key fields, test that disabling closure cleaner leads to an exception when using inner
* classes.
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableClosureCleaner();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
boolean correctExceptionTriggered = false;
try {
DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).where(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) throws Exception {
return new Tuple2<Integer, Long>(t.f0, t.f4);
}
}).equalTo(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> t) {
return new Tuple2<Integer, Long>(t.f0, t.f1);
}
}).with(new CoGroupFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
@Override
public void coGroup(Iterable<Tuple5<Integer, Long, Integer, String, Long>> first, Iterable<Tuple3<Integer, Long, String>> second, Collector<Tuple3<Integer, Long, String>> out) {
List<String> strs = new ArrayList<String>();
for (Tuple5<Integer, Long, Integer, String, Long> t : first) {
strs.add(t.f3);
}
for (Tuple3<Integer, Long, String> t : second) {
for (String s : strs) {
out.collect(new Tuple3<Integer, Long, String>(t.f0, t.f1, s));
}
}
}
});
} catch (InvalidProgramException ex) {
correctExceptionTriggered = (ex.getCause() instanceof java.io.NotSerializableException);
}
Assert.assertTrue(correctExceptionTriggered);
}
use of org.apache.flink.api.common.InvalidProgramException in project flink by apache.
the class StreamExecutionEnvironment method readFile.
/**
* Reads the contents of the user-specified {@code filePath} based on the given {@link
* FileInputFormat}. Depending on the provided {@link FileProcessingMode}.
*
* <p>See {@link #readFile(FileInputFormat, String, FileProcessingMode, long)}
*
* @param inputFormat The input format used to create the data stream
* @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or
* "hdfs://host:port/file/path")
* @param watchType The mode in which the source should operate, i.e. monitor path and react to
* new data, or process once and exit
* @param interval In the case of periodic path monitoring, this specifies the interval (in
* millis) between consecutive path scans
* @param filter The files to be excluded from the processing
* @param <OUT> The type of the returned data stream
* @return The data stream that represents the data read from the given file
* @deprecated Use {@link FileInputFormat#setFilesFilter(FilePathFilter)} to set a filter and
* {@link StreamExecutionEnvironment#readFile(FileInputFormat, String, FileProcessingMode,
* long)}
*/
@PublicEvolving
@Deprecated
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat, String filePath, FileProcessingMode watchType, long interval, FilePathFilter filter) {
inputFormat.setFilesFilter(filter);
TypeInformation<OUT> typeInformation;
try {
typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
} catch (Exception e) {
throw new InvalidProgramException("The type returned by the input format could not be " + "automatically determined. Please specify the TypeInformation of the produced type " + "explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
}
return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
use of org.apache.flink.api.common.InvalidProgramException in project flink by apache.
the class CollectionExecutor method executeBinaryOperator.
private <IN1, IN2, OUT> List<OUT> executeBinaryOperator(DualInputOperator<?, ?, ?, ?> operator, int superStep) throws Exception {
Operator<?> inputOp1 = operator.getFirstInput();
Operator<?> inputOp2 = operator.getSecondInput();
if (inputOp1 == null) {
throw new InvalidProgramException("The binary operation " + operator.getName() + " has no first input.");
}
if (inputOp2 == null) {
throw new InvalidProgramException("The binary operation " + operator.getName() + " has no second input.");
}
// compute inputs
@SuppressWarnings("unchecked") List<IN1> inputData1 = (List<IN1>) execute(inputOp1, superStep);
@SuppressWarnings("unchecked") List<IN2> inputData2 = (List<IN2>) execute(inputOp2, superStep);
@SuppressWarnings("unchecked") DualInputOperator<IN1, IN2, OUT, ?> typedOp = (DualInputOperator<IN1, IN2, OUT, ?>) operator;
// build the runtime context and compute broadcast variables, if necessary
TaskInfo taskInfo = new TaskInfo(typedOp.getName(), 1, 0, 1, 0);
RuntimeUDFContext ctx;
MetricGroup metrics = new UnregisteredMetricsGroup();
if (RichFunction.class.isAssignableFrom(typedOp.getUserCodeWrapper().getUserCodeClass())) {
ctx = superStep == 0 ? new RuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics) : new IterationRuntimeUDFContext(taskInfo, classLoader, executionConfig, cachedFiles, accumulators, metrics);
for (Map.Entry<String, Operator<?>> bcInputs : operator.getBroadcastInputs().entrySet()) {
List<?> bcData = execute(bcInputs.getValue());
ctx.setBroadcastVariable(bcInputs.getKey(), bcData);
}
} else {
ctx = null;
}
return typedOp.executeOnCollections(inputData1, inputData2, ctx, executionConfig);
}
use of org.apache.flink.api.common.InvalidProgramException in project flink by apache.
the class CollectionExecutor method executeDeltaIteration.
@SuppressWarnings("unchecked")
private <T> List<T> executeDeltaIteration(DeltaIterationBase<?, ?> iteration) throws Exception {
Operator<?> solutionInput = iteration.getInitialSolutionSet();
Operator<?> worksetInput = iteration.getInitialWorkset();
if (solutionInput == null) {
throw new InvalidProgramException("The delta iteration " + iteration.getName() + " has no initial solution set.");
}
if (worksetInput == null) {
throw new InvalidProgramException("The delta iteration " + iteration.getName() + " has no initial workset.");
}
if (iteration.getSolutionSetDelta() == null) {
throw new InvalidProgramException("The iteration " + iteration.getName() + " has no solution set delta defined (is not closed).");
}
if (iteration.getNextWorkset() == null) {
throw new InvalidProgramException("The iteration " + iteration.getName() + " has no workset defined (is not closed).");
}
List<T> solutionInputData = (List<T>) execute(solutionInput);
List<T> worksetInputData = (List<T>) execute(worksetInput);
// get the operators that are iterative
Set<Operator<?>> dynamics = new LinkedHashSet<Operator<?>>();
DynamicPathCollector dynCollector = new DynamicPathCollector(dynamics);
iteration.getSolutionSetDelta().accept(dynCollector);
iteration.getNextWorkset().accept(dynCollector);
BinaryOperatorInformation<?, ?, ?> operatorInfo = iteration.getOperatorInfo();
TypeInformation<?> solutionType = operatorInfo.getFirstInputType();
int[] keyColumns = iteration.getSolutionSetKeyFields();
boolean[] inputOrderings = new boolean[keyColumns.length];
TypeComparator<T> inputComparator = ((CompositeType<T>) solutionType).createComparator(keyColumns, inputOrderings, 0, executionConfig);
Map<TypeComparable<T>, T> solutionMap = new HashMap<TypeComparable<T>, T>(solutionInputData.size());
// fill the solution from the initial input
for (T delta : solutionInputData) {
TypeComparable<T> wrapper = new TypeComparable<T>(delta, inputComparator);
solutionMap.put(wrapper, delta);
}
List<?> currentWorkset = worksetInputData;
// register the aggregators
for (AggregatorWithName<?> a : iteration.getAggregators().getAllRegisteredAggregators()) {
aggregators.put(a.getName(), a.getAggregator());
}
String convCriterionAggName = iteration.getAggregators().getConvergenceCriterionAggregatorName();
ConvergenceCriterion<Value> convCriterion = (ConvergenceCriterion<Value>) iteration.getAggregators().getConvergenceCriterion();
final int maxIterations = iteration.getMaximumNumberOfIterations();
for (int superstep = 1; superstep <= maxIterations; superstep++) {
List<T> currentSolution = new ArrayList<T>(solutionMap.size());
currentSolution.addAll(solutionMap.values());
// set the input to the current partial solution
this.intermediateResults.put(iteration.getSolutionSet(), currentSolution);
this.intermediateResults.put(iteration.getWorkset(), currentWorkset);
// set the superstep number
iterationSuperstep = superstep;
// grab the current iteration result
List<T> solutionSetDelta = (List<T>) execute(iteration.getSolutionSetDelta(), superstep);
this.intermediateResults.put(iteration.getSolutionSetDelta(), solutionSetDelta);
// update the solution
for (T delta : solutionSetDelta) {
TypeComparable<T> wrapper = new TypeComparable<T>(delta, inputComparator);
solutionMap.put(wrapper, delta);
}
currentWorkset = execute(iteration.getNextWorkset(), superstep);
if (currentWorkset.isEmpty()) {
break;
}
// evaluate the aggregator convergence criterion
if (convCriterion != null && convCriterionAggName != null) {
Value v = aggregators.get(convCriterionAggName).getAggregate();
if (convCriterion.isConverged(superstep, v)) {
break;
}
}
// clear the dynamic results
for (Operator<?> o : dynamics) {
intermediateResults.remove(o);
}
// set the previous iteration's aggregates and reset the aggregators
for (Map.Entry<String, Aggregator<?>> e : aggregators.entrySet()) {
previousAggregates.put(e.getKey(), e.getValue().getAggregate());
e.getValue().reset();
}
}
previousAggregates.clear();
aggregators.clear();
List<T> currentSolution = new ArrayList<T>(solutionMap.size());
currentSolution.addAll(solutionMap.values());
return currentSolution;
}
Aggregations