use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.
the class OperatorChainDagGeneratorTest method testSequentialChaining.
/**
* Test sequential chaining.
* logical dag:
* src1 -> op11 -> op12 -> op13 -> sink1
*
* should be converted to the expected chained dag:
* src1 -> [op11 -> op12 -> op13] -> sink1
*/
@Test
public void testSequentialChaining() throws InjectionException {
final MISTQueryBuilder queryBuilder = new MISTQueryBuilder();
final ContinuousStream<String> src1 = queryBuilder.socketTextStream(TestParameters.LOCAL_TEXT_SOCKET_SOURCE_CONF);
final ContinuousStream<String> op11 = src1.filter((x) -> true);
final ContinuousStream<String> op12 = op11.filter((x) -> true);
final ContinuousStream<String> op13 = op12.filter((x) -> true);
final MISTStream<String> sink1 = op13.textSocketOutput(TestParameters.HOST, TestParameters.SINK_PORT);
queryBuilder.setApplicationId(TestParameters.SUPER_GROUP_ID);
final MISTQuery query = queryBuilder.build();
final DAG<MISTStream, MISTEdge> dag = query.getDAG();
final OperatorChainDagGenerator chainDagGenerator = new OperatorChainDagGenerator(dag);
final DAG<List<MISTStream>, MISTEdge> operatorChainDag = chainDagGenerator.generateOperatorChainDAG();
final List<MISTStream> src1List = Arrays.asList(src1);
final List<MISTStream> opList = Arrays.asList(op11, op12, op13);
final List<MISTStream> sinkList = Arrays.asList(sink1);
// Check src1 -> [op11->op12->op13] edge
final Map<List<MISTStream>, MISTEdge> e1 = operatorChainDag.getEdges(src1List);
final Map<List<MISTStream>, MISTEdge> result1 = new HashMap<>();
result1.put(opList, new MISTEdge(Direction.LEFT));
Assert.assertEquals(e1, result1);
// Check [op11->op12->op13] -> [sink1] edge
final Map<List<MISTStream>, MISTEdge> e2 = operatorChainDag.getEdges(opList);
final Map<List<MISTStream>, MISTEdge> result2 = new HashMap<>();
result2.put(sinkList, new MISTEdge(Direction.LEFT));
Assert.assertEquals(e2, result2);
}
use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.
the class OperatorChainDagGeneratorTest method testForkAndMergeChaining.
/**
* Test fork/merge chaining.
* logical dag:
* -> opB-1 ->
* src1 -> opA -> opB-2 -> opC ---> opD -> sink1
* -> opB-3 ---------->
*
* should be converted to the expected chained dag:
* -> [opB-1] ->
* src1 -> [opA] -> [opB-2] -> [opC] ---> [opD] -> sink1
* -> [opB-3] ------------>
*/
@Test
public void testForkAndMergeChaining() throws InjectionException {
final MISTQueryBuilder queryBuilder = new MISTQueryBuilder();
final ContinuousStream<String> src1 = queryBuilder.socketTextStream(TestParameters.LOCAL_TEXT_SOCKET_SOURCE_CONF);
final ContinuousStream<String> opA = src1.filter((x) -> true);
final ContinuousStream<String> opB1 = opA.filter((x) -> true);
final ContinuousStream<String> opB2 = opA.filter((x) -> true);
final ContinuousStream<String> opB3 = opA.filter((x) -> true);
final ContinuousStream<String> opC = opB2.union(opB1);
final ContinuousStream<String> opD = opC.union(opB3);
final MISTStream<String> sink1 = opD.textSocketOutput(TestParameters.HOST, TestParameters.SINK_PORT);
queryBuilder.setApplicationId(TestParameters.SUPER_GROUP_ID);
final MISTQuery query = queryBuilder.build();
final DAG<MISTStream, MISTEdge> dag = query.getDAG();
final OperatorChainDagGenerator chainDagGenerator = new OperatorChainDagGenerator(dag);
final DAG<List<MISTStream>, MISTEdge> operatorChainDag = chainDagGenerator.generateOperatorChainDAG();
// Expected outputs
final List<MISTStream> src1List = Arrays.asList(src1);
final List<MISTStream> opAList = Arrays.asList(opA);
final List<MISTStream> opB1List = Arrays.asList(opB1);
final List<MISTStream> opB2List = Arrays.asList(opB2);
final List<MISTStream> opB3List = Arrays.asList(opB3);
final List<MISTStream> opCList = Arrays.asList(opC);
final List<MISTStream> opDList = Arrays.asList(opD);
final List<MISTStream> sink1List = Arrays.asList(sink1);
// Check src1 -> [opA] edge
final Map<List<MISTStream>, MISTEdge> e1 = operatorChainDag.getEdges(src1List);
final Map<List<MISTStream>, MISTEdge> result1 = new HashMap<>();
result1.put(opAList, new MISTEdge(Direction.LEFT));
Assert.assertEquals(result1, e1);
// Check opA -> opB1 edges
// -> opB2
// -> opB3
final Map<List<MISTStream>, MISTEdge> e2 = operatorChainDag.getEdges(opAList);
final Map<List<MISTStream>, MISTEdge> result2 = new HashMap<>();
result2.put(opB1List, new MISTEdge(Direction.LEFT));
result2.put(opB2List, new MISTEdge(Direction.LEFT));
result2.put(opB3List, new MISTEdge(Direction.LEFT));
Assert.assertEquals(result2, e2);
// Check opB1 -> [opC] edge
final Map<List<MISTStream>, MISTEdge> e3 = operatorChainDag.getEdges(opB1List);
final Map<List<MISTStream>, MISTEdge> result3 = new HashMap<>();
result3.put(opCList, new MISTEdge(Direction.RIGHT));
Assert.assertEquals(result3, e3);
// Check opB2 -> [opC] edge
final Map<List<MISTStream>, MISTEdge> e4 = operatorChainDag.getEdges(opB2List);
final Map<List<MISTStream>, MISTEdge> result4 = new HashMap<>();
result4.put(opCList, new MISTEdge(Direction.LEFT));
Assert.assertEquals(result4, e4);
// Check opC -> [opD] edge
final Map<List<MISTStream>, MISTEdge> e5 = operatorChainDag.getEdges(opCList);
final Map<List<MISTStream>, MISTEdge> result5 = new HashMap<>();
result5.put(opDList, new MISTEdge(Direction.LEFT));
Assert.assertEquals(result5, e5);
// Check opB3 -> [opD] edge
final Map<List<MISTStream>, MISTEdge> e6 = operatorChainDag.getEdges(opB3List);
final Map<List<MISTStream>, MISTEdge> result6 = new HashMap<>();
result6.put(opDList, new MISTEdge(Direction.RIGHT));
Assert.assertEquals(result6, e6);
// Check opD -> [sink1] edge
final Map<List<MISTStream>, MISTEdge> e7 = operatorChainDag.getEdges(opDList);
final Map<List<MISTStream>, MISTEdge> result7 = new HashMap<>();
result7.put(sink1List, new MISTEdge(Direction.LEFT));
Assert.assertEquals(result7, e7);
}
use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.
the class LogicalDagOptimizer method optimizeSubDag.
/**
* Obtimize the operators and sinks recursively (DFS order) according to the mechanism.
* @param currVertex current vertex
* @param visited visited vertices
*/
private void optimizeSubDag(final MISTStream currVertex, final Set<MISTStream> visited) {
if (!visited.contains(currVertex)) {
visited.add(currVertex);
final Map<MISTStream, MISTEdge> edges = dag.getEdges(currVertex);
// checking whether there is any conditionally branching edge diverged from current vertex
if (!(currVertex instanceof ContinuousStreamImpl) || ((ContinuousStreamImpl) currVertex).getCondBranchCount() == 0) {
// current vertex is not a continuous stream or this edge is an ordinary (non-branch) edge
for (final MISTStream nextVertex : edges.keySet()) {
optimizeSubDag(nextVertex, visited);
}
} else {
// current vertex has some conditionally branching edges
final Map<Integer, ContinuousStreamImpl> branchStreams = new HashMap<>();
// gather the branching streams
for (final MISTStream nextVertex : edges.keySet()) {
if (nextVertex instanceof ContinuousStreamImpl) {
final ContinuousStreamImpl contNextVertex = (ContinuousStreamImpl) nextVertex;
if (contNextVertex.getBranchIndex() > 0) {
// this edge is a conditionally branching edge
branchStreams.put(contNextVertex.getBranchIndex(), contNextVertex);
}
}
optimizeSubDag(nextVertex, visited);
}
// gather condition udfs from each branch stream
final List<String> udfs = new ArrayList<>(branchStreams.size());
for (int i = 1; i <= branchStreams.size(); i++) {
final ContinuousStreamImpl branchStream = branchStreams.get(i);
final Map<String, String> conf = branchStream.getConfiguration();
udfs.add(conf.get(ConfKeys.OperatorConf.UDF_STRING.name()));
}
// create a new conditional branch vertex to unify these branch streams
final Map<String, String> opConf = new HashMap<>();
try {
opConf.put(ConfKeys.ConditionalBranchOperator.UDF_LIST_STRING.name(), SerializeUtils.serializeToString((Serializable) udfs));
} catch (final IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
final ContinuousStreamImpl unifiedBranchStream = new ContinuousStreamImpl(dag, opConf);
dag.addVertex(unifiedBranchStream);
dag.addEdge(currVertex, unifiedBranchStream, new MISTEdge(Direction.LEFT));
// merging all the branching vertices from the current vertex into a single conditional branch vertex
for (final ContinuousStreamImpl branchStream : branchStreams.values()) {
final List<MISTStream> branchDownStreams = new ArrayList<>();
for (final Map.Entry<MISTStream, MISTEdge> edgeFromBranch : dag.getEdges(branchStream).entrySet()) {
final MISTStream branchDownStream = edgeFromBranch.getKey();
branchDownStreams.add(branchDownStream);
dag.addEdge(unifiedBranchStream, branchDownStream, new MISTEdge(edgeFromBranch.getValue().getDirection(), branchStream.getBranchIndex()));
}
// to prevent the concurrent map modification, remove the edges from downStream separately
for (final MISTStream branchDownStream : branchDownStreams) {
dag.removeEdge(branchStream, branchDownStream);
}
dag.removeEdge(currVertex, branchStream);
dag.removeVertex(branchStream);
}
}
}
}
use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.
the class LogicalDagOptimizer method getOptimizedDAG.
/**
* Apply optimization techniques to the logical DAG.
* @return the optimized DAG
*/
public DAG<MISTStream, MISTEdge> getOptimizedDAG() {
// check visited vertices
final Set<MISTStream> visited = new HashSet<>();
// from the root operators which are following sources.
for (final MISTStream source : dag.getRootVertices()) {
final Map<MISTStream, MISTEdge> rootEdges = dag.getEdges(source);
visited.add(source);
for (final MISTStream nextVertex : rootEdges.keySet()) {
optimizeSubDag(nextVertex, visited);
}
}
return dag;
}
use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.
the class MISTQueryImpl method getAvroOperatorDag.
@Override
public Tuple<List<AvroVertex>, List<Edge>> getAvroOperatorDag() {
final LogicalDagOptimizer logicalDagOptimizer = new LogicalDagOptimizer(dag);
final DAG<MISTStream, MISTEdge> optimizedDag = logicalDagOptimizer.getOptimizedDAG();
final Queue<MISTStream> queue = new LinkedList<>();
final List<MISTStream> vertices = new ArrayList<>();
final List<Edge> edges = new ArrayList<>();
// Put all vertices into a queue
final Iterator<MISTStream> iterator = GraphUtils.topologicalSort(optimizedDag);
while (iterator.hasNext()) {
final MISTStream vertex = iterator.next();
queue.add(vertex);
vertices.add(vertex);
}
// Visit each vertex and serialize its edges
while (!queue.isEmpty()) {
final MISTStream vertex = queue.remove();
final int fromIndex = vertices.indexOf(vertex);
final Map<MISTStream, MISTEdge> neighbors = optimizedDag.getEdges(vertex);
for (final Map.Entry<MISTStream, MISTEdge> neighbor : neighbors.entrySet()) {
final int toIndex = vertices.indexOf(neighbor.getKey());
final MISTEdge edgeInfo = neighbor.getValue();
final Edge.Builder edgeBuilder = Edge.newBuilder().setFrom(fromIndex).setTo(toIndex).setDirection(edgeInfo.getDirection()).setBranchIndex(edgeInfo.getIndex());
edges.add(edgeBuilder.build());
}
}
final Set<MISTStream> rootVertices = optimizedDag.getRootVertices();
// Serialize each vertex via avro.
final List<AvroVertex> serializedVertices = new ArrayList<>();
for (final MISTStream vertex : vertices) {
final AvroVertex.Builder vertexBuilder = AvroVertex.newBuilder();
vertexBuilder.setConfiguration(vertex.getConfiguration());
vertexBuilder.setVertexId(String.valueOf(vertexIdIndex));
// Set vertex type
if (rootVertices.contains(vertex)) {
// this is a source
vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.SOURCE);
} else if (optimizedDag.getEdges(vertex).size() == 0) {
// this is a sink
vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.SINK);
} else {
vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.OPERATOR);
}
serializedVertices.add(vertexBuilder.build());
vertexIdIndex++;
}
return new Tuple<>(serializedVertices, edges);
}
Aggregations