Search in sources :

Example 1 with EPGMGraphHead

use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.

the class IndexedCSVDataSource method getGraphCollection.

@Override
public GraphCollection getGraphCollection() throws IOException {
    CSVMetaDataSource source = new CSVMetaDataSource();
    CSVMetaData metaData = source.readLocal(getMetaDataPath(), hdfsConfig);
    DataSet<Tuple3<String, String, String>> metaDataBroadcast = source.readDistributed(getMetaDataPath(), getConfig());
    ExecutionEnvironment env = getConfig().getExecutionEnvironment();
    GraphCollectionFactory factory = getConfig().getGraphCollectionFactory();
    Map<String, DataSet<EPGMGraphHead>> graphHeads = metaData.getGraphLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getGraphHeadCSVPath(label)).map(new CSVLineToGraphHead(factory.getGraphHeadFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(graphHead -> graphHead.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    Map<String, DataSet<EPGMVertex>> vertices = metaData.getVertexLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getVertexCSVPath(label)).map(new CSVLineToVertex(factory.getVertexFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(vertex -> vertex.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    Map<String, DataSet<EPGMEdge>> edges = metaData.getEdgeLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getEdgeCSVPath(label)).map(new CSVLineToEdge(factory.getEdgeFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(edge -> edge.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    return factory.fromIndexedDataSets(graphHeads, vertices, edges);
}
Also used : Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GradoopFlinkConfig(org.gradoop.flink.util.GradoopFlinkConfig) IOException(java.io.IOException) GraphCollection(org.gradoop.flink.model.impl.epgm.GraphCollection) CSVLineToGraphHead(org.gradoop.flink.io.impl.csv.functions.CSVLineToGraphHead) Collectors(java.util.stream.Collectors) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) DataSource(org.gradoop.flink.io.api.DataSource) Objects(java.util.Objects) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) CSVMetaDataSource(org.gradoop.flink.io.impl.csv.metadata.CSVMetaDataSource) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) CSVBase(org.gradoop.flink.io.impl.csv.CSVBase) CSVLineToEdge(org.gradoop.flink.io.impl.csv.functions.CSVLineToEdge) CSVMetaData(org.gradoop.flink.io.impl.csv.metadata.CSVMetaData) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) CSVLineToVertex(org.gradoop.flink.io.impl.csv.functions.CSVLineToVertex) GraphCollectionFactory(org.gradoop.flink.model.impl.epgm.GraphCollectionFactory) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) CSVLineToGraphHead(org.gradoop.flink.io.impl.csv.functions.CSVLineToGraphHead) DataSet(org.apache.flink.api.java.DataSet) CSVLineToEdge(org.gradoop.flink.io.impl.csv.functions.CSVLineToEdge) CSVMetaData(org.gradoop.flink.io.impl.csv.metadata.CSVMetaData) Tuple3(org.apache.flink.api.java.tuple.Tuple3) CSVLineToVertex(org.gradoop.flink.io.impl.csv.functions.CSVLineToVertex) CSVMetaDataSource(org.gradoop.flink.io.impl.csv.metadata.CSVMetaDataSource) GraphCollectionFactory(org.gradoop.flink.model.impl.epgm.GraphCollectionFactory)

Example 2 with EPGMGraphHead

use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.

the class VertexFusion method execute.

/**
 * Fusing the already-combined sources.
 *
 * @param searchGraph   Logical Graph defining the data lake
 * @param graphPatterns Collection of elements representing which vertices will be merged into
 *                      a vertex
 * @return              A single merged graph
 */
public LogicalGraph execute(LogicalGraph searchGraph, GraphCollection graphPatterns) {
    // Missing in the theoric definition: creating a new header
    GradoopId newGraphid = GradoopId.get();
    DataSet<EPGMGraphHead> gh = searchGraph.getGraphHead().map(new MapGraphHeadForNewGraph(newGraphid));
    DataSet<GradoopId> patternVertexIds = graphPatterns.getVertices().map(new Id<>());
    DataSet<GradoopId> patternEdgeIds = graphPatterns.getEdges().map(new Id<>());
    // PHASE 1: Induced Subgraphs
    // Associate each vertex to its graph id
    DataSet<Tuple2<EPGMVertex, GradoopId>> patternVerticesWithGraphIDs = graphPatterns.getVertices().coGroup(searchGraph.getVertices()).where(new Id<>()).equalTo(new Id<>()).with(new LeftSide<>()).flatMap(new MapVertexToPairWithGraphId());
    // Associate each gid in hypervertices.H to the merged vertices
    DataSet<Tuple2<EPGMVertex, GradoopId>> mergedVertices = graphPatterns.getGraphHeads().map(new CoGroupGraphHeadToVertex());
    // PHASE 2: Recreating the vertices
    DataSet<EPGMVertex> vi = searchGraph.getVertices().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternVertexIds, IdNotInBroadcast.IDS);
    DataSet<Tuple2<EPGMVertex, GradoopId>> idJoin = patternVerticesWithGraphIDs.coGroup(mergedVertices).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new CoGroupAssociateOldVerticesWithNewIds()).union(vi.map(new MapVerticesAsTuplesWithNullId()));
    DataSet<EPGMVertex> vToRet = mergedVertices.coGroup(patternVerticesWithGraphIDs).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new LeftSide<>()).map(new Value0Of2<>()).union(vi).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    // PHASE 3: Recreating the edges
    DataSet<EPGMEdge> edges = searchGraph.getEdges().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternEdgeIds, IdNotInBroadcast.IDS).leftOuterJoin(idJoin).where(new SourceId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(true)).leftOuterJoin(idJoin).where(new TargetId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(false)).groupBy(new Id<>()).reduceGroup(new AddNewIdToDuplicatedEdge()).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    return searchGraph.getFactory().fromDataSets(gh, vToRet, edges);
}
Also used : EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) IdNotInBroadcast(org.gradoop.flink.model.impl.functions.epgm.IdNotInBroadcast) LeftSide(org.gradoop.flink.model.impl.functions.utils.LeftSide) Value1Of2(org.gradoop.flink.model.impl.functions.tuple.Value1Of2) CoGroupAssociateOldVerticesWithNewIds(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupAssociateOldVerticesWithNewIds) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) CoGroupGraphHeadToVertex(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupGraphHeadToVertex) FlatJoinSourceEdgeReference(org.gradoop.flink.model.impl.operators.fusion.functions.FlatJoinSourceEdgeReference) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) LeftElementId(org.gradoop.flink.model.impl.operators.fusion.functions.LeftElementId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapGraphHeadForNewGraph(org.gradoop.flink.model.impl.operators.fusion.functions.MapGraphHeadForNewGraph)

Example 3 with EPGMGraphHead

use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.

the class BaseFactory method createGraphHeadDataSet.

/**
 * Creates a graph head dataset from a given collection.
 * Encapsulates the workaround for dataset creation from an empty collection.
 *
 * @param graphHeads graph heads
 * @return graph head dataset
 */
protected DataSet<EPGMGraphHead> createGraphHeadDataSet(Collection<EPGMGraphHead> graphHeads) {
    ExecutionEnvironment env = getConfig().getExecutionEnvironment();
    DataSet<EPGMGraphHead> graphHeadSet;
    if (graphHeads.isEmpty()) {
        graphHeadSet = env.fromElements(getGraphHeadFactory().createGraphHead()).filter(new False<>());
    } else {
        graphHeadSet = env.fromCollection(graphHeads);
    }
    return graphHeadSet;
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) False(org.gradoop.flink.model.impl.functions.bool.False) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead)

Example 4 with EPGMGraphHead

use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.

the class RollUp method execute.

/**
 * Applies the rollUp operation on the given input graph.
 *
 * @param graph input graph
 * @return graphCollection containing all differently grouped graphs
 */
@Override
public GraphCollection execute(LogicalGraph graph) {
    DataSet<EPGMGraphHead> graphHeads = null;
    DataSet<EPGMVertex> vertices = null;
    DataSet<EPGMEdge> edges = null;
    List<List<String>> groupingKeyCombinations = getGroupingKeyCombinations();
    // for each permutation execute a grouping
    for (List<String> combination : groupingKeyCombinations) {
        // apply the grouping
        LogicalGraph groupedGraph = applyGrouping(graph, combination);
        // add a property to the grouped graph's head to specify the used keys
        PropertyValue groupingKeys = PropertyValue.create(String.join(",", combination));
        DataSet<EPGMGraphHead> newGraphHead = groupedGraph.getGraphHead().map(new SetProperty<>(getGraphPropertyKey(), groupingKeys));
        if (graphHeads != null && vertices != null && edges != null) {
            // in later iterations union the datasets of the grouped elements with the existing ones
            graphHeads = graphHeads.union(newGraphHead);
            vertices = vertices.union(groupedGraph.getVertices());
            edges = edges.union(groupedGraph.getEdges());
        } else {
            // in the first iteration, fill the datasets
            graphHeads = newGraphHead;
            vertices = groupedGraph.getVertices();
            edges = groupedGraph.getEdges();
        }
    }
    // We initialized the DataSets with null, so it may be possible that they're still null here,
    // so we should check and return an empty collection in this case.
    // But the overhead of creating an empty collection should only be done, if at least one of the
    // DataSets is null.
    GraphCollection collection;
    if (graphHeads != null && vertices != null && edges != null) {
        collection = graph.getCollectionFactory().fromDataSets(graphHeads, vertices, edges);
    } else {
        collection = graph.getCollectionFactory().createEmptyCollection();
    }
    return collection;
}
Also used : GraphCollection(org.gradoop.flink.model.impl.epgm.GraphCollection) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) PropertyValue(org.gradoop.common.model.impl.properties.PropertyValue) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) ArrayList(java.util.ArrayList) List(java.util.List) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph)

Example 5 with EPGMGraphHead

use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.

the class AverageIncomingDegree method execute.

@Override
public LogicalGraph execute(LogicalGraph graph) {
    graph = graph.aggregate(new VertexCount());
    DataSet<EPGMGraphHead> newGraphHead = new IncomingVertexDegrees().execute(graph).sum(1).crossWithTiny(graph.getGraphHead().first(1)).with(new AddSumDegreesToGraphHeadCrossFunction(SamplingEvaluationConstants.PROPERTY_KEY_SUM_DEGREES)).map(new CalculateAverageDegree(SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_INCOMING_DEGREE));
    return graph.getFactory().fromDataSets(newGraphHead, graph.getVertices(), graph.getEdges());
}
Also used : CalculateAverageDegree(org.gradoop.flink.model.impl.operators.statistics.functions.CalculateAverageDegree) VertexCount(org.gradoop.flink.model.impl.operators.aggregation.functions.count.VertexCount) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) AddSumDegreesToGraphHeadCrossFunction(org.gradoop.flink.model.impl.operators.statistics.functions.AddSumDegreesToGraphHeadCrossFunction)

Aggregations

EPGMGraphHead (org.gradoop.common.model.impl.pojo.EPGMGraphHead)133 EPGMVertex (org.gradoop.common.model.impl.pojo.EPGMVertex)91 EPGMEdge (org.gradoop.common.model.impl.pojo.EPGMEdge)89 Test (org.junit.Test)55 Test (org.testng.annotations.Test)38 GraphCollection (org.gradoop.flink.model.impl.epgm.GraphCollection)35 LogicalGraph (org.gradoop.flink.model.impl.epgm.LogicalGraph)35 FlinkAsciiGraphLoader (org.gradoop.flink.util.FlinkAsciiGraphLoader)34 ArrayList (java.util.ArrayList)20 GradoopIdSet (org.gradoop.common.model.impl.id.GradoopIdSet)20 PropertyValue (org.gradoop.common.model.impl.properties.PropertyValue)16 Collectors (java.util.stream.Collectors)15 List (java.util.List)14 GradoopId (org.gradoop.common.model.impl.id.GradoopId)14 HBaseEPGMStore (org.gradoop.storage.hbase.impl.HBaseEPGMStore)14 GraphTransaction (org.gradoop.flink.model.impl.layouts.transactional.tuples.GraphTransaction)13 Identifiable (org.gradoop.common.model.api.entities.Identifiable)12 Query (org.gradoop.storage.common.predicate.query.Query)12 GradoopTestUtils.validateElementCollections (org.gradoop.common.GradoopTestUtils.validateElementCollections)11 HBaseDataSource (org.gradoop.storage.hbase.impl.io.HBaseDataSource)11