use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PythonPlanBinder method createUnionOperation.
@SuppressWarnings("unchecked")
private void createUnionOperation(PythonOperationInfo info) throws IOException {
DataSet op1 = (DataSet) sets.get(info.parentID);
DataSet op2 = (DataSet) sets.get(info.otherID);
sets.put(info.setID, op1.union(op2).setParallelism(getParallelism(info)).name("Union"));
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PythonPlanBinder method createDistinctOperation.
@SuppressWarnings("unchecked")
private void createDistinctOperation(PythonOperationInfo info) throws IOException {
DataSet op = (DataSet) sets.get(info.parentID);
sets.put(info.setID, op.distinct(info.keys).setParallelism(getParallelism(info)).name("Distinct").map(new KeyDiscarder()).setParallelism(getParallelism(info)).name("DistinctPostStep"));
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PythonPlanBinder method createCsvSink.
@SuppressWarnings("unchecked")
private void createCsvSink(PythonOperationInfo info) throws IOException {
DataSet parent = (DataSet) sets.get(info.parentID);
parent.map(new StringTupleDeserializerMap()).setParallelism(getParallelism(info)).name("CsvSinkPreStep").writeAsCsv(info.path, info.lineDelimiter, info.fieldDelimiter, info.writeMode).setParallelism(getParallelism(info)).name("CsvSink");
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PythonPlanBinder method createJoinOperation.
@SuppressWarnings("unchecked")
private void createJoinOperation(DatasizeHint mode, PythonOperationInfo info) {
DataSet op1 = (DataSet) sets.get(info.parentID);
DataSet op2 = (DataSet) sets.get(info.otherID);
if (info.usesUDF) {
sets.put(info.setID, createDefaultJoin(op1, op2, info.keys1, info.keys2, mode, getParallelism(info)).mapPartition(new PythonMapPartition(info.setID, info.types)).setParallelism(getParallelism(info)).name(info.name));
} else {
sets.put(info.setID, createDefaultJoin(op1, op2, info.keys1, info.keys2, mode, getParallelism(info)));
}
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PythonPlanBinder method createBroadcastVariable.
private void createBroadcastVariable(PythonOperationInfo info) throws IOException {
UdfOperator<?> op1 = (UdfOperator) sets.get(info.parentID);
DataSet<?> op2 = (DataSet) sets.get(info.otherID);
op1.withBroadcastSet(op2, info.name);
Configuration c = op1.getParameters();
if (c == null) {
c = new Configuration();
}
int count = c.getInteger(PLANBINDER_CONFIG_BCVAR_COUNT, 0);
c.setInteger(PLANBINDER_CONFIG_BCVAR_COUNT, count + 1);
c.setString(PLANBINDER_CONFIG_BCVAR_NAME_PREFIX + count, info.name);
op1.withParameters(c);
}
Aggregations