use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.
the class ValueWeaklyConnectedComponents method transformToGelly.
/**
* Transforms a {@link BaseGraph} to a Gelly Graph.
*
* @param graph Gradoop Graph.
* @return Gelly Graph.
*/
@Override
public Graph<Long, Long, NullValue> transformToGelly(LG graph) {
DataSet<Tuple2<Long, GradoopId>> uniqueVertexID = DataSetUtils.zipWithUniqueId(graph.getVertices().map(new Id<>()));
DataSet<org.apache.flink.graph.Vertex<Long, Long>> vertices = uniqueVertexID.map(new LongTupleToGellyVertexWithLongValue());
DataSet<org.apache.flink.graph.Edge<Long, NullValue>> edges = uniqueVertexID.join(graph.getEdges()).where(1).equalTo(new SourceId<>()).with(new CreateLongSourceIds<>()).join(uniqueVertexID).where(3).equalTo(1).with(new CreateLongTargetIds()).map(new LongTupleToGellyEdgeWithLongValue());
return Graph.fromDataSet(vertices, edges, graph.getConfig().getExecutionEnvironment());
}
use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.
the class VertexFusion method execute.
/**
* Fusing the already-combined sources.
*
* @param searchGraph Logical Graph defining the data lake
* @param graphPatterns Collection of elements representing which vertices will be merged into
* a vertex
* @return A single merged graph
*/
public LogicalGraph execute(LogicalGraph searchGraph, GraphCollection graphPatterns) {
// Missing in the theoric definition: creating a new header
GradoopId newGraphid = GradoopId.get();
DataSet<EPGMGraphHead> gh = searchGraph.getGraphHead().map(new MapGraphHeadForNewGraph(newGraphid));
DataSet<GradoopId> patternVertexIds = graphPatterns.getVertices().map(new Id<>());
DataSet<GradoopId> patternEdgeIds = graphPatterns.getEdges().map(new Id<>());
// PHASE 1: Induced Subgraphs
// Associate each vertex to its graph id
DataSet<Tuple2<EPGMVertex, GradoopId>> patternVerticesWithGraphIDs = graphPatterns.getVertices().coGroup(searchGraph.getVertices()).where(new Id<>()).equalTo(new Id<>()).with(new LeftSide<>()).flatMap(new MapVertexToPairWithGraphId());
// Associate each gid in hypervertices.H to the merged vertices
DataSet<Tuple2<EPGMVertex, GradoopId>> mergedVertices = graphPatterns.getGraphHeads().map(new CoGroupGraphHeadToVertex());
// PHASE 2: Recreating the vertices
DataSet<EPGMVertex> vi = searchGraph.getVertices().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternVertexIds, IdNotInBroadcast.IDS);
DataSet<Tuple2<EPGMVertex, GradoopId>> idJoin = patternVerticesWithGraphIDs.coGroup(mergedVertices).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new CoGroupAssociateOldVerticesWithNewIds()).union(vi.map(new MapVerticesAsTuplesWithNullId()));
DataSet<EPGMVertex> vToRet = mergedVertices.coGroup(patternVerticesWithGraphIDs).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new LeftSide<>()).map(new Value0Of2<>()).union(vi).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
// PHASE 3: Recreating the edges
DataSet<EPGMEdge> edges = searchGraph.getEdges().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternEdgeIds, IdNotInBroadcast.IDS).leftOuterJoin(idJoin).where(new SourceId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(true)).leftOuterJoin(idJoin).where(new TargetId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(false)).groupBy(new Id<>()).reduceGroup(new AddNewIdToDuplicatedEdge()).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
return searchGraph.getFactory().fromDataSets(gh, vToRet, edges);
}
use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.
the class ApplyAggregation method executeForGVELayout.
@Override
public GC executeForGVELayout(GC collection) {
DataSet<G> graphHeads = collection.getGraphHeads();
DataSet<GradoopId> graphIds = graphHeads.map(new Id<>());
DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregate = aggregateVertices(collection.getVertices(), graphIds).union(aggregateEdges(collection.getEdges(), graphIds)).groupBy(0).reduceGroup(new CombinePartitionApplyAggregates(aggregateFunctions));
graphHeads = graphHeads.coGroup(aggregate).where(new Id<>()).equalTo(0).with(new SetAggregateProperties<>(aggregateFunctions));
return collection.getFactory().fromDataSets(graphHeads, collection.getVertices(), collection.getEdges());
}
use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.
the class TransactionalPatternMatching method findEmbeddings.
/**
* Finds all embeddings in the given graph and constructs a new graph
* collection consisting of these embeddings.
* @param collection input graph collection
* @param graphs graphs with candidates of their elements
* @return collection of found embeddings
*/
private GC findEmbeddings(GC collection, DataSet<GraphWithCandidates> graphs) {
// --------------------------------------------------------------------------
// run the matching algorithm
// --------------------------------------------------------------------------
DataSet<Tuple4<GradoopId, GradoopId, GradoopIdSet, GradoopIdSet>> embeddings = graphs.flatMap(new FindEmbeddings(algorithm, query));
// --------------------------------------------------------------------------
// create new graph heads
// --------------------------------------------------------------------------
DataSet<G> newHeads = embeddings.map(new Project4To0And1<>()).map(new InitGraphHeadWithLineage<>(collection.getFactory().getGraphHeadFactory()));
// --------------------------------------------------------------------------
// update vertex graphs
// --------------------------------------------------------------------------
DataSet<Tuple2<GradoopId, GradoopIdSet>> verticesWithGraphs = embeddings.map(new Project4To0And2AndSwitch<>()).flatMap(new ExpandFirstField<>()).groupBy(0).reduceGroup(new MergeSecondField<>());
DataSet<V> newVertices = verticesWithGraphs.join(collection.getVertices()).where(0).equalTo(new Id<>()).with(new AddGraphsToElements<>());
// --------------------------------------------------------------------------
// update edge graphs
// --------------------------------------------------------------------------
DataSet<Tuple2<GradoopId, GradoopIdSet>> edgesWithGraphs = embeddings.map(new Project4To0And3AndSwitch<>()).flatMap(new ExpandFirstField<>()).groupBy(0).reduceGroup(new MergeSecondField<>());
DataSet<E> newEdges = edgesWithGraphs.join(collection.getEdges()).where(0).equalTo(new Id<>()).with(new AddGraphsToElements<>());
// --------------------------------------------------------------------------
return collection.getFactory().fromDataSets(newHeads, newVertices, newEdges);
}
use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.
the class RandomWalkSampling method sample.
@Override
protected LogicalGraph sample(LogicalGraph graph) {
LogicalGraph gellyResult = graph.callForGraph(new KRandomJumpGellyVCI<>(numberOfStartVertices, maxIteration, jumpProbability, sampleSize));
DataSet<EPGMVertex> sampledVertices = gellyResult.getVertices().filter(new ByProperty<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
DataSet<EPGMEdge> sampledEdges = graph.getEdges().join(sampledVertices).where(new SourceId<>()).equalTo(new Id<>()).with(new EdgeSourceVertexJoin(SamplingConstants.PROPERTY_KEY_SAMPLED)).join(sampledVertices).where(1).equalTo(new Id<>()).with(new EdgeTargetVertexJoin(SamplingConstants.PROPERTY_KEY_SAMPLED)).filter(new EdgesWithSampledVerticesFilter(Neighborhood.BOTH)).map(new Value0Of3<>());
return graph.getFactory().fromDataSets(sampledVertices, sampledEdges);
}
Aggregations