Search in sources :

Example 1 with Value0Of2

use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.

the class GroupingGroupCombine method groupInternal.

@Override
protected LG groupInternal(LG graph) {
    DataSet<V> vertices = isRetainingVerticesWithoutGroup() ? graph.getVertices().filter(new LabelGroupFilter<>(getVertexLabelGroups(), useVertexLabels())) : graph.getVertices();
    // map vertex to vertex group item
    DataSet<VertexGroupItem> verticesForGrouping = vertices.flatMap(new BuildVertexGroupItem<>(useVertexLabels(), getVertexLabelGroups()));
    // group vertices by label / properties / both
    DataSet<VertexGroupItem> combinedVertexGroupItems = groupVertices(verticesForGrouping).combineGroup(new CombineVertexGroupItems(useVertexLabels()));
    // filter super vertex tuples (1..n per partition/group)
    // group  super vertex tuples
    // create super vertex tuple (1 per group) + previous super vertex ids
    DataSet<Tuple2<VertexGroupItem, IdWithIdSet>> superVertexTuples = groupVertices(combinedVertexGroupItems.filter(new FilterSuperVertices())).reduceGroup(new TransposeVertexGroupItems(useVertexLabels()));
    // build super vertices from super vertex tuples
    DataSet<V> superVertices = superVertexTuples.map(new Value0Of2<>()).map(new BuildSuperVertex<>(useVertexLabels(), graph.getFactory().getVertexFactory()));
    // extract mapping
    DataSet<IdWithIdSet> mapping = superVertexTuples.map(new Value1Of2<>());
    // filter non-candidates from combiner output
    // update their vertex representative according to the mapping
    DataSet<VertexWithSuperVertex> vertexToRepresentativeMap = combinedVertexGroupItems.filter(new FilterRegularVertices()).map(new BuildVertexWithSuperVertexBC()).withBroadcastSet(mapping, BuildVertexWithSuperVertexBC.BC_MAPPING);
    DataSet<E> edgesToGroup = graph.getEdges();
    if (isRetainingVerticesWithoutGroup()) {
        LG retainedVerticesSubgraph = getSubgraphOfRetainedVertices(graph);
        // To add support for grouped edges between retained vertices and supervertices,
        // vertices are their group representatives themselves
        vertexToRepresentativeMap = updateVertexRepresentatives(vertexToRepresentativeMap, retainedVerticesSubgraph.getVertices());
        // don't execute grouping on edges between retained vertices
        // but execute on edges between retained vertices and grouped vertices
        // graph.getEdges() - retainedVerticesSubgraph.getEdges()
        edgesToGroup = subtractEdges(graph.getEdges(), retainedVerticesSubgraph.getEdges());
    }
    DataSet<E> superEdges = buildSuperEdges(graph.getFactory().getEdgeFactory(), edgesToGroup, vertexToRepresentativeMap);
    if (isRetainingVerticesWithoutGroup()) {
        LG retainedVerticesSubgraph = getSubgraphOfRetainedVertices(graph);
        superVertices = superVertices.union(retainedVerticesSubgraph.getVertices());
        superEdges = superEdges.union(retainedVerticesSubgraph.getEdges());
    }
    return graph.getFactory().fromDataSets(superVertices, superEdges);
}
Also used : Value0Of2(org.gradoop.flink.model.impl.functions.tuple.Value0Of2) VertexWithSuperVertex(org.gradoop.flink.model.impl.operators.grouping.tuples.VertexWithSuperVertex) IdWithIdSet(org.gradoop.flink.model.impl.tuples.IdWithIdSet) FilterRegularVertices(org.gradoop.flink.model.impl.operators.grouping.functions.FilterRegularVertices) CombineVertexGroupItems(org.gradoop.flink.model.impl.operators.grouping.functions.CombineVertexGroupItems) VertexGroupItem(org.gradoop.flink.model.impl.operators.grouping.tuples.VertexGroupItem) BuildVertexGroupItem(org.gradoop.flink.model.impl.operators.grouping.functions.BuildVertexGroupItem) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TransposeVertexGroupItems(org.gradoop.flink.model.impl.operators.grouping.functions.TransposeVertexGroupItems) BuildVertexWithSuperVertexBC(org.gradoop.flink.model.impl.operators.grouping.functions.BuildVertexWithSuperVertexBC) LabelGroupFilter(org.gradoop.flink.model.impl.operators.grouping.functions.LabelGroupFilter) FilterSuperVertices(org.gradoop.flink.model.impl.operators.grouping.functions.FilterSuperVertices)

Example 2 with Value0Of2

use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.

the class BusinessTransactionGraphs method execute.

@Override
public GraphCollection execute(LogicalGraph iig) {
    DataSet<EPGMVertex> masterVertices = iig.getVertices().filter(new MasterData<>());
    LogicalGraph transGraph = iig.vertexInducedSubgraph(new TransactionalData<>());
    DataSet<EPGMVertex> transVertices = transGraph.getVertices();
    DataSet<org.apache.flink.graph.Edge<GradoopId, NullValue>> transEdges = transGraph.getEdges().map(new ToGellyEdgeWithNullValue());
    Graph<GradoopId, GradoopId, NullValue> gellyTransGraph = Graph.fromDataSet(transVertices.map(new VertexToGellyVertexWithGradoopId()), transEdges, iig.getConfig().getExecutionEnvironment());
    gellyTransGraph = gellyTransGraph.getUndirected().runScatterGatherIteration(new BtgMessenger(), new BtgUpdater(), 100);
    DataSet<Tuple2<GradoopId, GradoopIdSet>> btgVerticesMap = gellyTransGraph.getVerticesAsTuple2().map(new SwitchPair<>()).groupBy(0).reduceGroup(new CollectGradoopIds()).map(new ComponentToNewBtgId());
    DataSet<Tuple2<GradoopId, GradoopId>> vertexBtgMap = btgVerticesMap.flatMap(new ExpandGradoopIds<>()).map(new SwitchPair<>());
    DataSet<EPGMGraphHead> graphHeads = btgVerticesMap.map(new Value0Of2<>()).map(new NewBtgGraphHead<>(iig.getFactory().getGraphHeadFactory()));
    // filter and update edges
    DataSet<EPGMEdge> btgEdges = iig.getEdges().join(vertexBtgMap).where(new SourceId<>()).equalTo(0).with(new SetBtgId<>());
    // update transactional vertices
    transVertices = transVertices.join(vertexBtgMap).where(new Id<>()).equalTo(0).with(new SetBtgId<>());
    // create master data BTG map
    vertexBtgMap = btgEdges.map(new TargetIdBtgId<>()).join(masterVertices).where(0).equalTo(new Id<>()).with(new LeftSide<>()).distinct();
    DataSet<Tuple2<GradoopId, GradoopIdSet>> vertexBtgsMap = vertexBtgMap.groupBy(0).reduceGroup(new CollectGradoopIds());
    masterVertices = masterVertices.join(vertexBtgsMap).where(new Id<>()).equalTo(0).with(new SetBtgIds<>());
    return iig.getCollectionFactory().fromDataSets(graphHeads, transVertices.union(masterVertices), btgEdges);
}
Also used : Value0Of2(org.gradoop.flink.model.impl.functions.tuple.Value0Of2) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) BtgMessenger(org.gradoop.flink.algorithms.btgs.functions.BtgMessenger) SetBtgIds(org.gradoop.flink.algorithms.btgs.functions.SetBtgIds) SetBtgId(org.gradoop.flink.algorithms.btgs.functions.SetBtgId) SwitchPair(org.gradoop.flink.model.impl.functions.tuple.SwitchPair) ToGellyEdgeWithNullValue(org.gradoop.flink.model.impl.functions.epgm.ToGellyEdgeWithNullValue) NullValue(org.apache.flink.types.NullValue) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) ComponentToNewBtgId(org.gradoop.flink.algorithms.btgs.functions.ComponentToNewBtgId) VertexToGellyVertexWithGradoopId(org.gradoop.flink.algorithms.gelly.functions.VertexToGellyVertexWithGradoopId) BtgUpdater(org.gradoop.flink.algorithms.btgs.functions.BtgUpdater) ExpandGradoopIds(org.gradoop.flink.model.impl.functions.epgm.ExpandGradoopIds) ToGellyEdgeWithNullValue(org.gradoop.flink.model.impl.functions.epgm.ToGellyEdgeWithNullValue) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) CollectGradoopIds(org.gradoop.flink.algorithms.btgs.functions.CollectGradoopIds) GradoopId(org.gradoop.common.model.impl.id.GradoopId) VertexToGellyVertexWithGradoopId(org.gradoop.flink.algorithms.gelly.functions.VertexToGellyVertexWithGradoopId) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ComponentToNewBtgId(org.gradoop.flink.algorithms.btgs.functions.ComponentToNewBtgId) TargetIdBtgId(org.gradoop.flink.algorithms.btgs.functions.TargetIdBtgId) SetBtgId(org.gradoop.flink.algorithms.btgs.functions.SetBtgId) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) GradoopId(org.gradoop.common.model.impl.id.GradoopId) VertexToGellyVertexWithGradoopId(org.gradoop.flink.algorithms.gelly.functions.VertexToGellyVertexWithGradoopId) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge)

Example 3 with Value0Of2

use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.

the class ExtractPropertyFromVertex method execute.

@Override
public LogicalGraph execute(LogicalGraph logicalGraph) {
    // filter the vertices by the given label
    DataSet<EPGMVertex> filteredVertices = logicalGraph.getVertices().filter(new ByLabel<>(forVerticesOfLabel));
    // calculate new vertices and store the origin for linking
    DataSet<Tuple2<PropertyValue, GradoopId>> candidates = filteredVertices.flatMap(new ExtractPropertyWithOriginId(originalPropertyName));
    // extract the new vertices
    DataSet<Tuple2<EPGMVertex, List<GradoopId>>> newVerticesAndOriginIds;
    if (condense) {
        newVerticesAndOriginIds = candidates.groupBy(0).reduceGroup(new CreateNewVertexWithEqualityCondense(logicalGraph.getFactory().getVertexFactory(), newVertexLabel, newPropertyName));
    } else {
        newVerticesAndOriginIds = candidates.map(new CreateNewVertex(logicalGraph.getFactory().getVertexFactory(), newVertexLabel, newPropertyName));
    }
    DataSet<EPGMVertex> vertices = newVerticesAndOriginIds.map(new Value0Of2<>()).map(new AddToGraphBroadcast<>()).withBroadcastSet(logicalGraph.getGraphHead().map(new Id<>()), AddToGraphBroadcast.GRAPH_ID).union(logicalGraph.getVertices());
    // the newly created vertices should be linked to the original vertices
    DataSet<EPGMEdge> edges = logicalGraph.getEdges();
    if (!edgeDirection.equals(EdgeDirection.NONE)) {
        edges = newVerticesAndOriginIds.flatMap(new CreateNewEdges(logicalGraph.getFactory().getEdgeFactory(), edgeDirection, edgeLabel)).map(new AddToGraphBroadcast<>()).withBroadcastSet(logicalGraph.getGraphHead().map(new Id<>()), AddToGraphBroadcast.GRAPH_ID).union(edges);
    }
    return logicalGraph.getFactory().fromDataSets(logicalGraph.getGraphHead(), vertices, edges);
}
Also used : CreateNewVertex(org.gradoop.dataintegration.transformation.impl.functions.CreateNewVertex) Value0Of2(org.gradoop.flink.model.impl.functions.tuple.Value0Of2) CreateNewVertexWithEqualityCondense(org.gradoop.dataintegration.transformation.impl.functions.CreateNewVertexWithEqualityCondense) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) GradoopId(org.gradoop.common.model.impl.id.GradoopId) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) AddToGraphBroadcast(org.gradoop.flink.model.impl.functions.graphcontainment.AddToGraphBroadcast) CreateNewEdges(org.gradoop.dataintegration.transformation.impl.functions.CreateNewEdges) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ExtractPropertyWithOriginId(org.gradoop.dataintegration.transformation.impl.functions.ExtractPropertyWithOriginId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) GradoopId(org.gradoop.common.model.impl.id.GradoopId) ExtractPropertyWithOriginId(org.gradoop.dataintegration.transformation.impl.functions.ExtractPropertyWithOriginId)

Example 4 with Value0Of2

use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.

the class KRandomJumpGellyVCI method executeInGelly.

@Override
public LG executeInGelly(Graph<Long, VCIVertexValue, Long> gellyGraph) throws Exception {
    long vertexCount = gellyGraph.numberOfVertices();
    // --------------------------------------------------------------------------
    // pre compute
    // --------------------------------------------------------------------------
    // define start vertices
    Set<Long> randomStartIndices = new HashSet<>();
    while (randomStartIndices.size() < numberOfStartVertices) {
        long randomLongInBounds = (long) (Math.random() * (vertexCount - 1L));
        randomStartIndices.add(randomLongInBounds);
    }
    DataSet<Long> startIndices = currentGraph.getConfig().getExecutionEnvironment().fromCollection(randomStartIndices);
    // define how many vertices to visit
    long verticesToVisit = (long) Math.ceil((double) vertexCount * percentageToVisit);
    // set compute parameters
    VertexCentricConfiguration parameters = new VertexCentricConfiguration();
    parameters.addBroadcastSet(VCIComputeFunction.START_INDICES_BROADCAST_SET, startIndices);
    parameters.addBroadcastSet(VCIComputeFunction.VERTEX_INDICES_BROADCAST_SET, indexToVertexIdMap.map(new Value0Of2<>()));
    parameters.registerAggregator(VCIComputeFunction.VISITED_VERTICES_AGGREGATOR_NAME, new LongSumAggregator());
    // run gelly
    Graph<Long, VCIVertexValue, Long> resultGraph = gellyGraph.runVertexCentricIteration(new VCIComputeFunction(jumpProbability, verticesToVisit), null, maxIterations, parameters);
    // --------------------------------------------------------------------------
    // post compute
    // --------------------------------------------------------------------------
    DataSet<GradoopId> visitedGellyEdgeIds = resultGraph.getVertices().flatMap(new GetVisitedGellyEdgeLongIdsFlatMap()).join(indexToEdgeIdMap).where("*").equalTo(0).with(new VisitedGellyEdgesWithLongIdToGradoopIdJoin());
    // compute new visited edges
    DataSet<E> visitedEdges = currentGraph.getEdges().leftOuterJoin(visitedGellyEdgeIds).where(new Id<>()).equalTo("*").with(new EdgeWithGellyEdgeIdJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
    DataSet<GradoopId> visitedSourceTargetIds = visitedEdges.flatMap(new GetVisitedSourceTargetIdsFlatMap<>(SamplingConstants.PROPERTY_KEY_SAMPLED)).distinct();
    // compute new visited vertices
    DataSet<V> visitedVertices = resultGraph.getVertices().join(indexToVertexIdMap).where(0).equalTo(0).with(new GellyVertexWithLongIdToGradoopIdJoin()).join(currentGraph.getVertices()).where(0).equalTo(new Id<>()).with(new GellyVertexWithVertexJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
    visitedVertices = visitedVertices.leftOuterJoin(visitedSourceTargetIds).where(new Id<>()).equalTo("*").with(new VertexWithVisitedSourceTargetIdJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
    // return graph
    return currentGraph.getFactory().fromDataSets(currentGraph.getGraphHead(), visitedVertices, visitedEdges);
}
Also used : Value0Of2(org.gradoop.flink.model.impl.functions.tuple.Value0Of2) GellyVertexWithLongIdToGradoopIdJoin(org.gradoop.flink.algorithms.gelly.randomjump.functions.GellyVertexWithLongIdToGradoopIdJoin) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) VCIComputeFunction(org.gradoop.flink.algorithms.gelly.randomjump.functions.VCIComputeFunction) VertexWithVisitedSourceTargetIdJoin(org.gradoop.flink.algorithms.gelly.randomjump.functions.VertexWithVisitedSourceTargetIdJoin) GetVisitedSourceTargetIdsFlatMap(org.gradoop.flink.algorithms.gelly.randomjump.functions.GetVisitedSourceTargetIdsFlatMap) VCIVertexValue(org.gradoop.flink.algorithms.gelly.randomjump.functions.VCIVertexValue) GetVisitedGellyEdgeLongIdsFlatMap(org.gradoop.flink.algorithms.gelly.randomjump.functions.GetVisitedGellyEdgeLongIdsFlatMap) VisitedGellyEdgesWithLongIdToGradoopIdJoin(org.gradoop.flink.algorithms.gelly.randomjump.functions.VisitedGellyEdgesWithLongIdToGradoopIdJoin) HashSet(java.util.HashSet) VertexCentricConfiguration(org.apache.flink.graph.pregel.VertexCentricConfiguration) GradoopId(org.gradoop.common.model.impl.id.GradoopId) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) GradoopId(org.gradoop.common.model.impl.id.GradoopId)

Aggregations

Value0Of2 (org.gradoop.flink.model.impl.functions.tuple.Value0Of2)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 GradoopId (org.gradoop.common.model.impl.id.GradoopId)3 Id (org.gradoop.flink.model.impl.functions.epgm.Id)3 EPGMEdge (org.gradoop.common.model.impl.pojo.EPGMEdge)2 EPGMVertex (org.gradoop.common.model.impl.pojo.EPGMVertex)2 SourceId (org.gradoop.flink.model.impl.functions.epgm.SourceId)2 HashSet (java.util.HashSet)1 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)1 VertexCentricConfiguration (org.apache.flink.graph.pregel.VertexCentricConfiguration)1 NullValue (org.apache.flink.types.NullValue)1 EPGMGraphHead (org.gradoop.common.model.impl.pojo.EPGMGraphHead)1 CreateNewEdges (org.gradoop.dataintegration.transformation.impl.functions.CreateNewEdges)1 CreateNewVertex (org.gradoop.dataintegration.transformation.impl.functions.CreateNewVertex)1 CreateNewVertexWithEqualityCondense (org.gradoop.dataintegration.transformation.impl.functions.CreateNewVertexWithEqualityCondense)1 ExtractPropertyWithOriginId (org.gradoop.dataintegration.transformation.impl.functions.ExtractPropertyWithOriginId)1 BtgMessenger (org.gradoop.flink.algorithms.btgs.functions.BtgMessenger)1 BtgUpdater (org.gradoop.flink.algorithms.btgs.functions.BtgUpdater)1 CollectGradoopIds (org.gradoop.flink.algorithms.btgs.functions.CollectGradoopIds)1 ComponentToNewBtgId (org.gradoop.flink.algorithms.btgs.functions.ComponentToNewBtgId)1