use of org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig in project tez by apache.
the class CartesianProduct method createDAG.
private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2, String inputPath3, String outputPath, boolean isPartitioned) throws IOException {
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
// turn off groupSplit so that each input file incurs one task
v1.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1).groupSplits(false).build());
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2).groupSplits(false).build());
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v3.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3).groupSplits(false).build());
CartesianProductConfig cartesianProductConfig;
if (isPartitioned) {
Map<String, Integer> vertexPartitionMap = new HashMap<>();
for (String vertex : cpSources) {
vertexPartitionMap.put(vertex, numPartition);
}
cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
} else {
cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
}
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v4.addDataSink(OUTPUT, MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath).build());
v4.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
EdgeManagerPluginDescriptor cpEdgeManager = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
cpEdgeManager.setUserPayload(userPayload);
EdgeProperty cpEdgeProperty;
if (isPartitioned) {
UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
} else {
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
}
EdgeProperty broadcastEdgeProperty;
UnorderedKVEdgeConfig broadcastEdgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();
return DAG.create("CartesianProduct").addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4).addEdge(Edge.create(v1, v4, cpEdgeProperty)).addEdge(Edge.create(v2, v4, cpEdgeProperty)).addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}
Aggregations