Search in sources :

Example 6 with MISTStream

use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.

the class OperatorChainDagGeneratorTest method testSequentialChaining.

/**
 * Test sequential chaining.
 * logical dag:
 * src1 -> op11 -> op12 -> op13 -> sink1
 *
 * should be converted to the expected chained dag:
 * src1 -> [op11 -> op12 -> op13] -> sink1
 */
@Test
public void testSequentialChaining() throws InjectionException {
    final MISTQueryBuilder queryBuilder = new MISTQueryBuilder();
    final ContinuousStream<String> src1 = queryBuilder.socketTextStream(TestParameters.LOCAL_TEXT_SOCKET_SOURCE_CONF);
    final ContinuousStream<String> op11 = src1.filter((x) -> true);
    final ContinuousStream<String> op12 = op11.filter((x) -> true);
    final ContinuousStream<String> op13 = op12.filter((x) -> true);
    final MISTStream<String> sink1 = op13.textSocketOutput(TestParameters.HOST, TestParameters.SINK_PORT);
    queryBuilder.setApplicationId(TestParameters.SUPER_GROUP_ID);
    final MISTQuery query = queryBuilder.build();
    final DAG<MISTStream, MISTEdge> dag = query.getDAG();
    final OperatorChainDagGenerator chainDagGenerator = new OperatorChainDagGenerator(dag);
    final DAG<List<MISTStream>, MISTEdge> operatorChainDag = chainDagGenerator.generateOperatorChainDAG();
    final List<MISTStream> src1List = Arrays.asList(src1);
    final List<MISTStream> opList = Arrays.asList(op11, op12, op13);
    final List<MISTStream> sinkList = Arrays.asList(sink1);
    // Check src1 -> [op11->op12->op13] edge
    final Map<List<MISTStream>, MISTEdge> e1 = operatorChainDag.getEdges(src1List);
    final Map<List<MISTStream>, MISTEdge> result1 = new HashMap<>();
    result1.put(opList, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(e1, result1);
    // Check [op11->op12->op13] -> [sink1] edge
    final Map<List<MISTStream>, MISTEdge> e2 = operatorChainDag.getEdges(opList);
    final Map<List<MISTStream>, MISTEdge> result2 = new HashMap<>();
    result2.put(sinkList, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(e2, result2);
}
Also used : HashMap(java.util.HashMap) List(java.util.List) MISTStream(edu.snu.mist.client.datastreams.MISTStream) MISTEdge(edu.snu.mist.common.graph.MISTEdge) Test(org.junit.Test)

Example 7 with MISTStream

use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.

the class OperatorChainDagGeneratorTest method testForkAndMergeChaining.

/**
 * Test fork/merge chaining.
 * logical dag:
 *             -> opB-1 ->
 * src1 -> opA -> opB-2 -> opC ---> opD -> sink1
 *             -> opB-3 ---------->
 *
 * should be converted to the expected chained dag:
 *               -> [opB-1] ->
 * src1 -> [opA] -> [opB-2] -> [opC] ---> [opD] -> sink1
 *               -> [opB-3] ------------>
 */
@Test
public void testForkAndMergeChaining() throws InjectionException {
    final MISTQueryBuilder queryBuilder = new MISTQueryBuilder();
    final ContinuousStream<String> src1 = queryBuilder.socketTextStream(TestParameters.LOCAL_TEXT_SOCKET_SOURCE_CONF);
    final ContinuousStream<String> opA = src1.filter((x) -> true);
    final ContinuousStream<String> opB1 = opA.filter((x) -> true);
    final ContinuousStream<String> opB2 = opA.filter((x) -> true);
    final ContinuousStream<String> opB3 = opA.filter((x) -> true);
    final ContinuousStream<String> opC = opB2.union(opB1);
    final ContinuousStream<String> opD = opC.union(opB3);
    final MISTStream<String> sink1 = opD.textSocketOutput(TestParameters.HOST, TestParameters.SINK_PORT);
    queryBuilder.setApplicationId(TestParameters.SUPER_GROUP_ID);
    final MISTQuery query = queryBuilder.build();
    final DAG<MISTStream, MISTEdge> dag = query.getDAG();
    final OperatorChainDagGenerator chainDagGenerator = new OperatorChainDagGenerator(dag);
    final DAG<List<MISTStream>, MISTEdge> operatorChainDag = chainDagGenerator.generateOperatorChainDAG();
    // Expected outputs
    final List<MISTStream> src1List = Arrays.asList(src1);
    final List<MISTStream> opAList = Arrays.asList(opA);
    final List<MISTStream> opB1List = Arrays.asList(opB1);
    final List<MISTStream> opB2List = Arrays.asList(opB2);
    final List<MISTStream> opB3List = Arrays.asList(opB3);
    final List<MISTStream> opCList = Arrays.asList(opC);
    final List<MISTStream> opDList = Arrays.asList(opD);
    final List<MISTStream> sink1List = Arrays.asList(sink1);
    // Check src1 -> [opA] edge
    final Map<List<MISTStream>, MISTEdge> e1 = operatorChainDag.getEdges(src1List);
    final Map<List<MISTStream>, MISTEdge> result1 = new HashMap<>();
    result1.put(opAList, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(result1, e1);
    // Check opA -> opB1 edges
    // -> opB2
    // -> opB3
    final Map<List<MISTStream>, MISTEdge> e2 = operatorChainDag.getEdges(opAList);
    final Map<List<MISTStream>, MISTEdge> result2 = new HashMap<>();
    result2.put(opB1List, new MISTEdge(Direction.LEFT));
    result2.put(opB2List, new MISTEdge(Direction.LEFT));
    result2.put(opB3List, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(result2, e2);
    // Check opB1 -> [opC] edge
    final Map<List<MISTStream>, MISTEdge> e3 = operatorChainDag.getEdges(opB1List);
    final Map<List<MISTStream>, MISTEdge> result3 = new HashMap<>();
    result3.put(opCList, new MISTEdge(Direction.RIGHT));
    Assert.assertEquals(result3, e3);
    // Check opB2 -> [opC] edge
    final Map<List<MISTStream>, MISTEdge> e4 = operatorChainDag.getEdges(opB2List);
    final Map<List<MISTStream>, MISTEdge> result4 = new HashMap<>();
    result4.put(opCList, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(result4, e4);
    // Check opC -> [opD] edge
    final Map<List<MISTStream>, MISTEdge> e5 = operatorChainDag.getEdges(opCList);
    final Map<List<MISTStream>, MISTEdge> result5 = new HashMap<>();
    result5.put(opDList, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(result5, e5);
    // Check opB3 -> [opD] edge
    final Map<List<MISTStream>, MISTEdge> e6 = operatorChainDag.getEdges(opB3List);
    final Map<List<MISTStream>, MISTEdge> result6 = new HashMap<>();
    result6.put(opDList, new MISTEdge(Direction.RIGHT));
    Assert.assertEquals(result6, e6);
    // Check opD -> [sink1] edge
    final Map<List<MISTStream>, MISTEdge> e7 = operatorChainDag.getEdges(opDList);
    final Map<List<MISTStream>, MISTEdge> result7 = new HashMap<>();
    result7.put(sink1List, new MISTEdge(Direction.LEFT));
    Assert.assertEquals(result7, e7);
}
Also used : HashMap(java.util.HashMap) List(java.util.List) MISTStream(edu.snu.mist.client.datastreams.MISTStream) MISTEdge(edu.snu.mist.common.graph.MISTEdge) Test(org.junit.Test)

Example 8 with MISTStream

use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.

the class LogicalDagOptimizer method optimizeSubDag.

/**
 * Obtimize the operators and sinks recursively (DFS order) according to the mechanism.
 * @param currVertex  current vertex
 * @param visited visited vertices
 */
private void optimizeSubDag(final MISTStream currVertex, final Set<MISTStream> visited) {
    if (!visited.contains(currVertex)) {
        visited.add(currVertex);
        final Map<MISTStream, MISTEdge> edges = dag.getEdges(currVertex);
        // checking whether there is any conditionally branching edge diverged from current vertex
        if (!(currVertex instanceof ContinuousStreamImpl) || ((ContinuousStreamImpl) currVertex).getCondBranchCount() == 0) {
            // current vertex is not a continuous stream or this edge is an ordinary (non-branch) edge
            for (final MISTStream nextVertex : edges.keySet()) {
                optimizeSubDag(nextVertex, visited);
            }
        } else {
            // current vertex has some conditionally branching edges
            final Map<Integer, ContinuousStreamImpl> branchStreams = new HashMap<>();
            // gather the branching streams
            for (final MISTStream nextVertex : edges.keySet()) {
                if (nextVertex instanceof ContinuousStreamImpl) {
                    final ContinuousStreamImpl contNextVertex = (ContinuousStreamImpl) nextVertex;
                    if (contNextVertex.getBranchIndex() > 0) {
                        // this edge is a conditionally branching edge
                        branchStreams.put(contNextVertex.getBranchIndex(), contNextVertex);
                    }
                }
                optimizeSubDag(nextVertex, visited);
            }
            // gather condition udfs from each branch stream
            final List<String> udfs = new ArrayList<>(branchStreams.size());
            for (int i = 1; i <= branchStreams.size(); i++) {
                final ContinuousStreamImpl branchStream = branchStreams.get(i);
                final Map<String, String> conf = branchStream.getConfiguration();
                udfs.add(conf.get(ConfKeys.OperatorConf.UDF_STRING.name()));
            }
            // create a new conditional branch vertex to unify these branch streams
            final Map<String, String> opConf = new HashMap<>();
            try {
                opConf.put(ConfKeys.ConditionalBranchOperator.UDF_LIST_STRING.name(), SerializeUtils.serializeToString((Serializable) udfs));
            } catch (final IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            final ContinuousStreamImpl unifiedBranchStream = new ContinuousStreamImpl(dag, opConf);
            dag.addVertex(unifiedBranchStream);
            dag.addEdge(currVertex, unifiedBranchStream, new MISTEdge(Direction.LEFT));
            // merging all the branching vertices from the current vertex into a single conditional branch vertex
            for (final ContinuousStreamImpl branchStream : branchStreams.values()) {
                final List<MISTStream> branchDownStreams = new ArrayList<>();
                for (final Map.Entry<MISTStream, MISTEdge> edgeFromBranch : dag.getEdges(branchStream).entrySet()) {
                    final MISTStream branchDownStream = edgeFromBranch.getKey();
                    branchDownStreams.add(branchDownStream);
                    dag.addEdge(unifiedBranchStream, branchDownStream, new MISTEdge(edgeFromBranch.getValue().getDirection(), branchStream.getBranchIndex()));
                }
                // to prevent the concurrent map modification, remove the edges from downStream separately
                for (final MISTStream branchDownStream : branchDownStreams) {
                    dag.removeEdge(branchStream, branchDownStream);
                }
                dag.removeEdge(currVertex, branchStream);
                dag.removeVertex(branchStream);
            }
        }
    }
}
Also used : ContinuousStreamImpl(edu.snu.mist.client.datastreams.ContinuousStreamImpl) Serializable(java.io.Serializable) IOException(java.io.IOException) MISTStream(edu.snu.mist.client.datastreams.MISTStream) MISTEdge(edu.snu.mist.common.graph.MISTEdge)

Example 9 with MISTStream

use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.

the class LogicalDagOptimizer method getOptimizedDAG.

/**
 * Apply optimization techniques to the logical DAG.
 * @return the optimized DAG
 */
public DAG<MISTStream, MISTEdge> getOptimizedDAG() {
    // check visited vertices
    final Set<MISTStream> visited = new HashSet<>();
    // from the root operators which are following sources.
    for (final MISTStream source : dag.getRootVertices()) {
        final Map<MISTStream, MISTEdge> rootEdges = dag.getEdges(source);
        visited.add(source);
        for (final MISTStream nextVertex : rootEdges.keySet()) {
            optimizeSubDag(nextVertex, visited);
        }
    }
    return dag;
}
Also used : MISTStream(edu.snu.mist.client.datastreams.MISTStream) MISTEdge(edu.snu.mist.common.graph.MISTEdge)

Example 10 with MISTStream

use of edu.snu.mist.client.datastreams.MISTStream in project mist by snuspl.

the class MISTQueryImpl method getAvroOperatorDag.

@Override
public Tuple<List<AvroVertex>, List<Edge>> getAvroOperatorDag() {
    final LogicalDagOptimizer logicalDagOptimizer = new LogicalDagOptimizer(dag);
    final DAG<MISTStream, MISTEdge> optimizedDag = logicalDagOptimizer.getOptimizedDAG();
    final Queue<MISTStream> queue = new LinkedList<>();
    final List<MISTStream> vertices = new ArrayList<>();
    final List<Edge> edges = new ArrayList<>();
    // Put all vertices into a queue
    final Iterator<MISTStream> iterator = GraphUtils.topologicalSort(optimizedDag);
    while (iterator.hasNext()) {
        final MISTStream vertex = iterator.next();
        queue.add(vertex);
        vertices.add(vertex);
    }
    // Visit each vertex and serialize its edges
    while (!queue.isEmpty()) {
        final MISTStream vertex = queue.remove();
        final int fromIndex = vertices.indexOf(vertex);
        final Map<MISTStream, MISTEdge> neighbors = optimizedDag.getEdges(vertex);
        for (final Map.Entry<MISTStream, MISTEdge> neighbor : neighbors.entrySet()) {
            final int toIndex = vertices.indexOf(neighbor.getKey());
            final MISTEdge edgeInfo = neighbor.getValue();
            final Edge.Builder edgeBuilder = Edge.newBuilder().setFrom(fromIndex).setTo(toIndex).setDirection(edgeInfo.getDirection()).setBranchIndex(edgeInfo.getIndex());
            edges.add(edgeBuilder.build());
        }
    }
    final Set<MISTStream> rootVertices = optimizedDag.getRootVertices();
    // Serialize each vertex via avro.
    final List<AvroVertex> serializedVertices = new ArrayList<>();
    for (final MISTStream vertex : vertices) {
        final AvroVertex.Builder vertexBuilder = AvroVertex.newBuilder();
        vertexBuilder.setConfiguration(vertex.getConfiguration());
        vertexBuilder.setVertexId(String.valueOf(vertexIdIndex));
        // Set vertex type
        if (rootVertices.contains(vertex)) {
            // this is a source
            vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.SOURCE);
        } else if (optimizedDag.getEdges(vertex).size() == 0) {
            // this is a sink
            vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.SINK);
        } else {
            vertexBuilder.setAvroVertexType(AvroVertexTypeEnum.OPERATOR);
        }
        serializedVertices.add(vertexBuilder.build());
        vertexIdIndex++;
    }
    return new Tuple<>(serializedVertices, edges);
}
Also used : AvroVertex(edu.snu.mist.formats.avro.AvroVertex) MISTStream(edu.snu.mist.client.datastreams.MISTStream) Edge(edu.snu.mist.formats.avro.Edge) MISTEdge(edu.snu.mist.common.graph.MISTEdge) MISTEdge(edu.snu.mist.common.graph.MISTEdge) Tuple(org.apache.reef.io.Tuple)

Aggregations

MISTStream (edu.snu.mist.client.datastreams.MISTStream)11 MISTEdge (edu.snu.mist.common.graph.MISTEdge)11 HashMap (java.util.HashMap)6 List (java.util.List)5 Test (org.junit.Test)5 ContinuousStreamImpl (edu.snu.mist.client.datastreams.ContinuousStreamImpl)2 AdjacentListDAG (edu.snu.mist.common.graph.AdjacentListDAG)1 AvroVertex (edu.snu.mist.formats.avro.AvroVertex)1 Edge (edu.snu.mist.formats.avro.Edge)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 Tuple (org.apache.reef.io.Tuple)1