Search in sources :

Example 1 with Id

use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.

the class ValueWeaklyConnectedComponents method transformToGelly.

/**
 * Transforms a {@link BaseGraph} to a Gelly Graph.
 *
 * @param graph Gradoop Graph.
 * @return Gelly Graph.
 */
@Override
public Graph<Long, Long, NullValue> transformToGelly(LG graph) {
    DataSet<Tuple2<Long, GradoopId>> uniqueVertexID = DataSetUtils.zipWithUniqueId(graph.getVertices().map(new Id<>()));
    DataSet<org.apache.flink.graph.Vertex<Long, Long>> vertices = uniqueVertexID.map(new LongTupleToGellyVertexWithLongValue());
    DataSet<org.apache.flink.graph.Edge<Long, NullValue>> edges = uniqueVertexID.join(graph.getEdges()).where(1).equalTo(new SourceId<>()).with(new CreateLongSourceIds<>()).join(uniqueVertexID).where(3).equalTo(1).with(new CreateLongTargetIds()).map(new LongTupleToGellyEdgeWithLongValue());
    return Graph.fromDataSet(vertices, edges, graph.getConfig().getExecutionEnvironment());
}
Also used : Vertex(org.gradoop.common.model.api.entities.Vertex) LongTupleToGellyVertexWithLongValue(org.gradoop.flink.algorithms.gelly.functions.LongTupleToGellyVertexWithLongValue) LongTupleToGellyEdgeWithLongValue(org.gradoop.flink.algorithms.gelly.functions.LongTupleToGellyEdgeWithLongValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) CreateLongTargetIds(org.gradoop.flink.algorithms.gelly.connectedcomponents.functions.CreateLongTargetIds) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) MapVertexIdComponentId(org.gradoop.flink.algorithms.gelly.connectedcomponents.functions.MapVertexIdComponentId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) Edge(org.gradoop.common.model.api.entities.Edge) CreateLongSourceIds(org.gradoop.flink.algorithms.gelly.connectedcomponents.functions.CreateLongSourceIds)

Example 2 with Id

use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.

the class VertexFusion method execute.

/**
 * Fusing the already-combined sources.
 *
 * @param searchGraph   Logical Graph defining the data lake
 * @param graphPatterns Collection of elements representing which vertices will be merged into
 *                      a vertex
 * @return              A single merged graph
 */
public LogicalGraph execute(LogicalGraph searchGraph, GraphCollection graphPatterns) {
    // Missing in the theoric definition: creating a new header
    GradoopId newGraphid = GradoopId.get();
    DataSet<EPGMGraphHead> gh = searchGraph.getGraphHead().map(new MapGraphHeadForNewGraph(newGraphid));
    DataSet<GradoopId> patternVertexIds = graphPatterns.getVertices().map(new Id<>());
    DataSet<GradoopId> patternEdgeIds = graphPatterns.getEdges().map(new Id<>());
    // PHASE 1: Induced Subgraphs
    // Associate each vertex to its graph id
    DataSet<Tuple2<EPGMVertex, GradoopId>> patternVerticesWithGraphIDs = graphPatterns.getVertices().coGroup(searchGraph.getVertices()).where(new Id<>()).equalTo(new Id<>()).with(new LeftSide<>()).flatMap(new MapVertexToPairWithGraphId());
    // Associate each gid in hypervertices.H to the merged vertices
    DataSet<Tuple2<EPGMVertex, GradoopId>> mergedVertices = graphPatterns.getGraphHeads().map(new CoGroupGraphHeadToVertex());
    // PHASE 2: Recreating the vertices
    DataSet<EPGMVertex> vi = searchGraph.getVertices().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternVertexIds, IdNotInBroadcast.IDS);
    DataSet<Tuple2<EPGMVertex, GradoopId>> idJoin = patternVerticesWithGraphIDs.coGroup(mergedVertices).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new CoGroupAssociateOldVerticesWithNewIds()).union(vi.map(new MapVerticesAsTuplesWithNullId()));
    DataSet<EPGMVertex> vToRet = mergedVertices.coGroup(patternVerticesWithGraphIDs).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new LeftSide<>()).map(new Value0Of2<>()).union(vi).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    // PHASE 3: Recreating the edges
    DataSet<EPGMEdge> edges = searchGraph.getEdges().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternEdgeIds, IdNotInBroadcast.IDS).leftOuterJoin(idJoin).where(new SourceId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(true)).leftOuterJoin(idJoin).where(new TargetId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(false)).groupBy(new Id<>()).reduceGroup(new AddNewIdToDuplicatedEdge()).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    return searchGraph.getFactory().fromDataSets(gh, vToRet, edges);
}
Also used : EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) IdNotInBroadcast(org.gradoop.flink.model.impl.functions.epgm.IdNotInBroadcast) LeftSide(org.gradoop.flink.model.impl.functions.utils.LeftSide) Value1Of2(org.gradoop.flink.model.impl.functions.tuple.Value1Of2) CoGroupAssociateOldVerticesWithNewIds(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupAssociateOldVerticesWithNewIds) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) CoGroupGraphHeadToVertex(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupGraphHeadToVertex) FlatJoinSourceEdgeReference(org.gradoop.flink.model.impl.operators.fusion.functions.FlatJoinSourceEdgeReference) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) LeftElementId(org.gradoop.flink.model.impl.operators.fusion.functions.LeftElementId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapGraphHeadForNewGraph(org.gradoop.flink.model.impl.operators.fusion.functions.MapGraphHeadForNewGraph)

Example 3 with Id

use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.

the class ApplyAggregation method executeForGVELayout.

@Override
public GC executeForGVELayout(GC collection) {
    DataSet<G> graphHeads = collection.getGraphHeads();
    DataSet<GradoopId> graphIds = graphHeads.map(new Id<>());
    DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregate = aggregateVertices(collection.getVertices(), graphIds).union(aggregateEdges(collection.getEdges(), graphIds)).groupBy(0).reduceGroup(new CombinePartitionApplyAggregates(aggregateFunctions));
    graphHeads = graphHeads.coGroup(aggregate).where(new Id<>()).equalTo(0).with(new SetAggregateProperties<>(aggregateFunctions));
    return collection.getFactory().fromDataSets(graphHeads, collection.getVertices(), collection.getEdges());
}
Also used : CombinePartitionApplyAggregates(org.gradoop.flink.model.impl.operators.aggregation.functions.CombinePartitionApplyAggregates) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SetAggregateProperties(org.gradoop.flink.model.impl.operators.aggregation.functions.SetAggregateProperties) PropertyValue(org.gradoop.common.model.impl.properties.PropertyValue) Id(org.gradoop.flink.model.impl.functions.epgm.Id) GradoopId(org.gradoop.common.model.impl.id.GradoopId) GradoopId(org.gradoop.common.model.impl.id.GradoopId)

Example 4 with Id

use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.

the class TransactionalPatternMatching method findEmbeddings.

/**
 * Finds all embeddings in the given graph and constructs a new graph
 * collection consisting of these embeddings.
 * @param collection input graph collection
 * @param graphs graphs with candidates of their elements
 * @return collection of found embeddings
 */
private GC findEmbeddings(GC collection, DataSet<GraphWithCandidates> graphs) {
    // --------------------------------------------------------------------------
    // run the matching algorithm
    // --------------------------------------------------------------------------
    DataSet<Tuple4<GradoopId, GradoopId, GradoopIdSet, GradoopIdSet>> embeddings = graphs.flatMap(new FindEmbeddings(algorithm, query));
    // --------------------------------------------------------------------------
    // create new graph heads
    // --------------------------------------------------------------------------
    DataSet<G> newHeads = embeddings.map(new Project4To0And1<>()).map(new InitGraphHeadWithLineage<>(collection.getFactory().getGraphHeadFactory()));
    // --------------------------------------------------------------------------
    // update vertex graphs
    // --------------------------------------------------------------------------
    DataSet<Tuple2<GradoopId, GradoopIdSet>> verticesWithGraphs = embeddings.map(new Project4To0And2AndSwitch<>()).flatMap(new ExpandFirstField<>()).groupBy(0).reduceGroup(new MergeSecondField<>());
    DataSet<V> newVertices = verticesWithGraphs.join(collection.getVertices()).where(0).equalTo(new Id<>()).with(new AddGraphsToElements<>());
    // --------------------------------------------------------------------------
    // update edge graphs
    // --------------------------------------------------------------------------
    DataSet<Tuple2<GradoopId, GradoopIdSet>> edgesWithGraphs = embeddings.map(new Project4To0And3AndSwitch<>()).flatMap(new ExpandFirstField<>()).groupBy(0).reduceGroup(new MergeSecondField<>());
    DataSet<E> newEdges = edgesWithGraphs.join(collection.getEdges()).where(0).equalTo(new Id<>()).with(new AddGraphsToElements<>());
    // --------------------------------------------------------------------------
    return collection.getFactory().fromDataSets(newHeads, newVertices, newEdges);
}
Also used : Project4To0And3AndSwitch(org.gradoop.flink.model.impl.operators.matching.transactional.function.Project4To0And3AndSwitch) FindEmbeddings(org.gradoop.flink.model.impl.operators.matching.transactional.function.FindEmbeddings) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Project4To0And1(org.gradoop.flink.model.impl.functions.tuple.Project4To0And1) Id(org.gradoop.flink.model.impl.functions.epgm.Id) GradoopId(org.gradoop.common.model.impl.id.GradoopId) Project4To0And2AndSwitch(org.gradoop.flink.model.impl.operators.matching.transactional.function.Project4To0And2AndSwitch)

Example 5 with Id

use of org.gradoop.flink.model.impl.functions.epgm.Id in project gradoop by dbs-leipzig.

the class RandomWalkSampling method sample.

@Override
protected LogicalGraph sample(LogicalGraph graph) {
    LogicalGraph gellyResult = graph.callForGraph(new KRandomJumpGellyVCI<>(numberOfStartVertices, maxIteration, jumpProbability, sampleSize));
    DataSet<EPGMVertex> sampledVertices = gellyResult.getVertices().filter(new ByProperty<>(SamplingConstants.PROPERTY_KEY_SAMPLED));
    DataSet<EPGMEdge> sampledEdges = graph.getEdges().join(sampledVertices).where(new SourceId<>()).equalTo(new Id<>()).with(new EdgeSourceVertexJoin(SamplingConstants.PROPERTY_KEY_SAMPLED)).join(sampledVertices).where(1).equalTo(new Id<>()).with(new EdgeTargetVertexJoin(SamplingConstants.PROPERTY_KEY_SAMPLED)).filter(new EdgesWithSampledVerticesFilter(Neighborhood.BOTH)).map(new Value0Of3<>());
    return graph.getFactory().fromDataSets(sampledVertices, sampledEdges);
}
Also used : EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) EdgeTargetVertexJoin(org.gradoop.flink.model.impl.operators.sampling.functions.EdgeTargetVertexJoin) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) EdgesWithSampledVerticesFilter(org.gradoop.flink.model.impl.operators.sampling.functions.EdgesWithSampledVerticesFilter) EdgeSourceVertexJoin(org.gradoop.flink.model.impl.operators.sampling.functions.EdgeSourceVertexJoin) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) Id(org.gradoop.flink.model.impl.functions.epgm.Id)

Aggregations

Id (org.gradoop.flink.model.impl.functions.epgm.Id)17 GradoopId (org.gradoop.common.model.impl.id.GradoopId)11 EPGMEdge (org.gradoop.common.model.impl.pojo.EPGMEdge)9 EPGMVertex (org.gradoop.common.model.impl.pojo.EPGMVertex)9 SourceId (org.gradoop.flink.model.impl.functions.epgm.SourceId)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 LogicalGraph (org.gradoop.flink.model.impl.epgm.LogicalGraph)7 Value0Of2 (org.gradoop.flink.model.impl.functions.tuple.Value0Of2)3 FlinkAsciiGraphLoader (org.gradoop.flink.util.FlinkAsciiGraphLoader)3 Test (org.junit.Test)3 DataSet (org.apache.flink.api.java.DataSet)2 EPGMGraphHead (org.gradoop.common.model.impl.pojo.EPGMGraphHead)2 GraphCollection (org.gradoop.flink.model.impl.epgm.GraphCollection)2 TargetId (org.gradoop.flink.model.impl.functions.epgm.TargetId)2 AddToGraphBroadcast (org.gradoop.flink.model.impl.functions.graphcontainment.AddToGraphBroadcast)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)1 IterativeDataSet (org.apache.flink.api.java.operators.IterativeDataSet)1 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)1