use of org.apache.flink.table.planner.plan.nodes.exec.InputProperty.HashDistribution in project flink by apache.
the class StreamExecExchange method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final Transformation<RowData> inputTransform = (Transformation<RowData>) getInputEdges().get(0).translateToPlan(planner);
final StreamPartitioner<RowData> partitioner;
final int parallelism;
final InputProperty inputProperty = getInputProperties().get(0);
final InputProperty.DistributionType distributionType = inputProperty.getRequiredDistribution().getType();
switch(distributionType) {
case SINGLETON:
partitioner = new GlobalPartitioner<>();
parallelism = 1;
break;
case HASH:
// TODO Eliminate duplicate keys
int[] keys = ((HashDistribution) inputProperty.getRequiredDistribution()).getKeys();
InternalTypeInfo<RowData> inputType = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(keys, inputType);
partitioner = new KeyGroupStreamPartitioner<>(keySelector, DEFAULT_LOWER_BOUND_MAX_PARALLELISM);
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
break;
default:
throw new TableException(String.format("%s is not supported now!", distributionType));
}
final Transformation<RowData> transformation = new PartitionTransformation<>(inputTransform, partitioner);
createTransformationMeta(EXCHANGE_TRANSFORMATION, config).fill(transformation);
transformation.setParallelism(parallelism);
transformation.setOutputType(InternalTypeInfo.of(getOutputType()));
return transformation;
}
use of org.apache.flink.table.planner.plan.nodes.exec.InputProperty.HashDistribution in project flink by apache.
the class BatchExecExchange method getHashDistributionDescription.
private String getHashDistributionDescription(HashDistribution hashDistribution) {
RowType inputRowType = (RowType) getInputEdges().get(0).getOutputType();
String[] fieldNames = Arrays.stream(hashDistribution.getKeys()).mapToObj(i -> inputRowType.getFieldNames().get(i)).toArray(String[]::new);
return Arrays.stream(fieldNames).collect(Collectors.joining(", ", "[", "]"));
}
use of org.apache.flink.table.planner.plan.nodes.exec.InputProperty.HashDistribution in project flink by apache.
the class BatchExecExchange method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputType = (RowType) inputEdge.getOutputType();
boolean requireUndefinedExchangeMode = false;
final StreamPartitioner<RowData> partitioner;
final int parallelism;
final InputProperty inputProperty = getInputProperties().get(0);
final RequiredDistribution requiredDistribution = inputProperty.getRequiredDistribution();
final InputProperty.DistributionType distributionType = requiredDistribution.getType();
switch(distributionType) {
case ANY:
partitioner = null;
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
break;
case BROADCAST:
partitioner = new BroadcastPartitioner<>();
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
break;
case SINGLETON:
partitioner = new GlobalPartitioner<>();
parallelism = 1;
break;
case HASH:
partitioner = createHashPartitioner(((HashDistribution) requiredDistribution), inputType, config);
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
break;
case KEEP_INPUT_AS_IS:
KeepInputAsIsDistribution keepInputAsIsDistribution = (KeepInputAsIsDistribution) requiredDistribution;
if (keepInputAsIsDistribution.isStrict()) {
// explicitly use ForwardPartitioner to guarantee the data distribution is
// exactly the same as input
partitioner = new ForwardPartitioner<>();
requireUndefinedExchangeMode = true;
} else {
RequiredDistribution inputDistribution = ((KeepInputAsIsDistribution) requiredDistribution).getInputDistribution();
checkArgument(inputDistribution instanceof HashDistribution, "Only HashDistribution is supported now");
partitioner = new ForwardForConsecutiveHashPartitioner<>(createHashPartitioner(((HashDistribution) inputDistribution), inputType, config));
}
parallelism = inputTransform.getParallelism();
break;
default:
throw new TableException(distributionType + "is not supported now!");
}
final StreamExchangeMode exchangeMode = requireUndefinedExchangeMode ? StreamExchangeMode.UNDEFINED : getBatchStreamExchangeMode(config, requiredExchangeMode);
final Transformation<RowData> transformation = new PartitionTransformation<>(inputTransform, partitioner, exchangeMode);
transformation.setParallelism(parallelism);
transformation.setOutputType(InternalTypeInfo.of(getOutputType()));
return transformation;
}
use of org.apache.flink.table.planner.plan.nodes.exec.InputProperty.HashDistribution in project flink by apache.
the class BatchExecExchange method getDescription.
@Override
public String getDescription() {
// make sure the description be consistent with before, update this once plan is stable
RequiredDistribution requiredDistribution = getInputProperties().get(0).getRequiredDistribution();
StringBuilder sb = new StringBuilder();
String type = requiredDistribution.getType().name().toLowerCase();
if (type.equals("singleton")) {
type = "single";
} else if (requiredDistribution instanceof KeepInputAsIsDistribution && ((KeepInputAsIsDistribution) requiredDistribution).isStrict()) {
type = "forward";
}
sb.append("distribution=[").append(type);
if (requiredDistribution instanceof HashDistribution) {
sb.append(getHashDistributionDescription((HashDistribution) requiredDistribution));
} else if (requiredDistribution instanceof KeepInputAsIsDistribution && !((KeepInputAsIsDistribution) requiredDistribution).isStrict()) {
KeepInputAsIsDistribution distribution = (KeepInputAsIsDistribution) requiredDistribution;
sb.append("[hash").append(getHashDistributionDescription((HashDistribution) distribution.getInputDistribution())).append("]");
}
sb.append("]");
if (requiredExchangeMode == StreamExchangeMode.BATCH) {
sb.append(", shuffle_mode=[BATCH]");
}
return String.format("Exchange(%s)", sb);
}
use of org.apache.flink.table.planner.plan.nodes.exec.InputProperty.HashDistribution in project flink by apache.
the class RequiredDistributionJsonSerializer method serialize.
@Override
public void serialize(RequiredDistribution requiredDistribution, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException {
jsonGenerator.writeStartObject();
DistributionType type = requiredDistribution.getType();
jsonGenerator.writeStringField("type", type.name());
switch(type) {
case ANY:
case SINGLETON:
case BROADCAST:
case UNKNOWN:
// do nothing, type name is enough
break;
case HASH:
HashDistribution hashDistribution = (HashDistribution) requiredDistribution;
jsonGenerator.writeFieldName("keys");
jsonGenerator.writeArray(hashDistribution.getKeys(), // offset
0, hashDistribution.getKeys().length);
break;
default:
throw new TableException("Unsupported distribution type: " + type);
}
jsonGenerator.writeEndObject();
}
Aggregations