use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.
the class GroupingGroupCombine method groupInternal.
@Override
protected LG groupInternal(LG graph) {
DataSet<V> vertices = isRetainingVerticesWithoutGroup() ? graph.getVertices().filter(new LabelGroupFilter<>(getVertexLabelGroups(), useVertexLabels())) : graph.getVertices();
// map vertex to vertex group item
DataSet<VertexGroupItem> verticesForGrouping = vertices.flatMap(new BuildVertexGroupItem<>(useVertexLabels(), getVertexLabelGroups()));
// group vertices by label / properties / both
DataSet<VertexGroupItem> combinedVertexGroupItems = groupVertices(verticesForGrouping).combineGroup(new CombineVertexGroupItems(useVertexLabels()));
// filter super vertex tuples (1..n per partition/group)
// group super vertex tuples
// create super vertex tuple (1 per group) + previous super vertex ids
DataSet<Tuple2<VertexGroupItem, IdWithIdSet>> superVertexTuples = groupVertices(combinedVertexGroupItems.filter(new FilterSuperVertices())).reduceGroup(new TransposeVertexGroupItems(useVertexLabels()));
// build super vertices from super vertex tuples
DataSet<V> superVertices = superVertexTuples.map(new Value0Of2<>()).map(new BuildSuperVertex<>(useVertexLabels(), graph.getFactory().getVertexFactory()));
// extract mapping
DataSet<IdWithIdSet> mapping = superVertexTuples.map(new Value1Of2<>());
// filter non-candidates from combiner output
// update their vertex representative according to the mapping
DataSet<VertexWithSuperVertex> vertexToRepresentativeMap = combinedVertexGroupItems.filter(new FilterRegularVertices()).map(new BuildVertexWithSuperVertexBC()).withBroadcastSet(mapping, BuildVertexWithSuperVertexBC.BC_MAPPING);
DataSet<E> edgesToGroup = graph.getEdges();
if (isRetainingVerticesWithoutGroup()) {
LG retainedVerticesSubgraph = getSubgraphOfRetainedVertices(graph);
// To add support for grouped edges between retained vertices and supervertices,
// vertices are their group representatives themselves
vertexToRepresentativeMap = updateVertexRepresentatives(vertexToRepresentativeMap, retainedVerticesSubgraph.getVertices());
// don't execute grouping on edges between retained vertices
// but execute on edges between retained vertices and grouped vertices
// graph.getEdges() - retainedVerticesSubgraph.getEdges()
edgesToGroup = subtractEdges(graph.getEdges(), retainedVerticesSubgraph.getEdges());
}
DataSet<E> superEdges = buildSuperEdges(graph.getFactory().getEdgeFactory(), edgesToGroup, vertexToRepresentativeMap);
if (isRetainingVerticesWithoutGroup()) {
LG retainedVerticesSubgraph = getSubgraphOfRetainedVertices(graph);
superVertices = superVertices.union(retainedVerticesSubgraph.getVertices());
superEdges = superEdges.union(retainedVerticesSubgraph.getEdges());
}
return graph.getFactory().fromDataSets(superVertices, superEdges);
}
use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.
the class BusinessTransactionGraphs method execute.
@Override
public GraphCollection execute(LogicalGraph iig) {
DataSet<EPGMVertex> masterVertices = iig.getVertices().filter(new MasterData<>());
LogicalGraph transGraph = iig.vertexInducedSubgraph(new TransactionalData<>());
DataSet<EPGMVertex> transVertices = transGraph.getVertices();
DataSet<org.apache.flink.graph.Edge<GradoopId, NullValue>> transEdges = transGraph.getEdges().map(new ToGellyEdgeWithNullValue());
Graph<GradoopId, GradoopId, NullValue> gellyTransGraph = Graph.fromDataSet(transVertices.map(new VertexToGellyVertexWithGradoopId()), transEdges, iig.getConfig().getExecutionEnvironment());
gellyTransGraph = gellyTransGraph.getUndirected().runScatterGatherIteration(new BtgMessenger(), new BtgUpdater(), 100);
DataSet<Tuple2<GradoopId, GradoopIdSet>> btgVerticesMap = gellyTransGraph.getVerticesAsTuple2().map(new SwitchPair<>()).groupBy(0).reduceGroup(new CollectGradoopIds()).map(new ComponentToNewBtgId());
DataSet<Tuple2<GradoopId, GradoopId>> vertexBtgMap = btgVerticesMap.flatMap(new ExpandGradoopIds<>()).map(new SwitchPair<>());
DataSet<EPGMGraphHead> graphHeads = btgVerticesMap.map(new Value0Of2<>()).map(new NewBtgGraphHead<>(iig.getFactory().getGraphHeadFactory()));
// filter and update edges
DataSet<EPGMEdge> btgEdges = iig.getEdges().join(vertexBtgMap).where(new SourceId<>()).equalTo(0).with(new SetBtgId<>());
// update transactional vertices
transVertices = transVertices.join(vertexBtgMap).where(new Id<>()).equalTo(0).with(new SetBtgId<>());
// create master data BTG map
vertexBtgMap = btgEdges.map(new TargetIdBtgId<>()).join(masterVertices).where(0).equalTo(new Id<>()).with(new LeftSide<>()).distinct();
DataSet<Tuple2<GradoopId, GradoopIdSet>> vertexBtgsMap = vertexBtgMap.groupBy(0).reduceGroup(new CollectGradoopIds());
masterVertices = masterVertices.join(vertexBtgsMap).where(new Id<>()).equalTo(0).with(new SetBtgIds<>());
return iig.getCollectionFactory().fromDataSets(graphHeads, transVertices.union(masterVertices), btgEdges);
}
use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.
the class ExtractPropertyFromVertex method execute.
@Override
public LogicalGraph execute(LogicalGraph logicalGraph) {
// filter the vertices by the given label
DataSet<EPGMVertex> filteredVertices = logicalGraph.getVertices().filter(new ByLabel<>(forVerticesOfLabel));
// calculate new vertices and store the origin for linking
DataSet<Tuple2<PropertyValue, GradoopId>> candidates = filteredVertices.flatMap(new ExtractPropertyWithOriginId(originalPropertyName));
// extract the new vertices
DataSet<Tuple2<EPGMVertex, List<GradoopId>>> newVerticesAndOriginIds;
if (condense) {
newVerticesAndOriginIds = candidates.groupBy(0).reduceGroup(new CreateNewVertexWithEqualityCondense(logicalGraph.getFactory().getVertexFactory(), newVertexLabel, newPropertyName));
} else {
newVerticesAndOriginIds = candidates.map(new CreateNewVertex(logicalGraph.getFactory().getVertexFactory(), newVertexLabel, newPropertyName));
}
DataSet<EPGMVertex> vertices = newVerticesAndOriginIds.map(new Value0Of2<>()).map(new AddToGraphBroadcast<>()).withBroadcastSet(logicalGraph.getGraphHead().map(new Id<>()), AddToGraphBroadcast.GRAPH_ID).union(logicalGraph.getVertices());
// the newly created vertices should be linked to the original vertices
DataSet<EPGMEdge> edges = logicalGraph.getEdges();
if (!edgeDirection.equals(EdgeDirection.NONE)) {
edges = newVerticesAndOriginIds.flatMap(new CreateNewEdges(logicalGraph.getFactory().getEdgeFactory(), edgeDirection, edgeLabel)).map(new AddToGraphBroadcast<>()).withBroadcastSet(logicalGraph.getGraphHead().map(new Id<>()), AddToGraphBroadcast.GRAPH_ID).union(edges);
}
return logicalGraph.getFactory().fromDataSets(logicalGraph.getGraphHead(), vertices, edges);
}
use of org.gradoop.flink.model.impl.functions.tuple.Value0Of2 in project gradoop by dbs-leipzig.
the class KRandomJumpGellyVCI method executeInGelly.
@Override
public LG executeInGelly(Graph<Long, VCIVertexValue, Long> gellyGraph) throws Exception {
long vertexCount = gellyGraph.numberOfVertices();
// --------------------------------------------------------------------------
// pre compute
// --------------------------------------------------------------------------
// define start vertices
Set<Long> randomStartIndices = new HashSet<>();
while (randomStartIndices.size() < numberOfStartVertices) {
long randomLongInBounds = (long) (Math.random() * (vertexCount - 1L));
randomStartIndices.add(randomLongInBounds);
}
DataSet<Long> startIndices = currentGraph.getConfig().getExecutionEnvironment().fromCollection(randomStartIndices);
// define how many vertices to visit
long verticesToVisit = (long) Math.ceil((double) vertexCount * percentageToVisit);
// set compute parameters
VertexCentricConfiguration parameters = new VertexCentricConfiguration();
parameters.addBroadcastSet(VCIComputeFunction.START_INDICES_BROADCAST_SET, startIndices);
parameters.addBroadcastSet(VCIComputeFunction.VERTEX_INDICES_BROADCAST_SET, indexToVertexIdMap.map(new Value0Of2<>()));
parameters.registerAggregator(VCIComputeFunction.VISITED_VERTICES_AGGREGATOR_NAME, new LongSumAggregator());
// run gelly
Graph<Long, VCIVertexValue, Long> resultGraph = gellyGraph.runVertexCentricIteration(new VCIComputeFunction(jumpProbability, verticesToVisit), null, maxIterations, parameters);
// --------------------------------------------------------------------------
// post compute
// --------------------------------------------------------------------------
DataSet<GradoopId> visitedGellyEdgeIds = resultGraph.getVertices().flatMap(new GetVisitedGellyEdgeLongIdsFlatMap()).join(indexToEdgeIdMap).where("*").equalTo(0).with(new VisitedGellyEdgesWithLongIdToGradoopIdJoin());
// compute new visited edges
DataSet<E> visitedEdges = currentGraph.getEdges().leftOuterJoin(visitedGellyEdgeIds).where(new Id<>()).equalTo("*").with(new EdgeWithGellyEdgeIdJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
DataSet<GradoopId> visitedSourceTargetIds = visitedEdges.flatMap(new GetVisitedSourceTargetIdsFlatMap<>(SamplingConstants.PROPERTY_KEY_SAMPLED)).distinct();
// compute new visited vertices
DataSet<V> visitedVertices = resultGraph.getVertices().join(indexToVertexIdMap).where(0).equalTo(0).with(new GellyVertexWithLongIdToGradoopIdJoin()).join(currentGraph.getVertices()).where(0).equalTo(new Id<>()).with(new GellyVertexWithVertexJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
visitedVertices = visitedVertices.leftOuterJoin(visitedSourceTargetIds).where(new Id<>()).equalTo("*").with(new VertexWithVisitedSourceTargetIdJoin<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
// return graph
return currentGraph.getFactory().fromDataSets(currentGraph.getGraphHead(), visitedVertices, visitedEdges);
}
Aggregations