Search in sources :

Example 1 with EPGMEdge

use of org.gradoop.common.model.impl.pojo.EPGMEdge in project gradoop by dbs-leipzig.

the class CreateCartesianNeighborhoodEdgesTest method testWithEmptyNeighborhood.

/**
 * Test the function using an empty neighborhood. Should produce no edges.
 *
 * @throws Exception when the execution in Flink fails.
 */
@Test
public void testWithEmptyNeighborhood() throws Exception {
    EPGMVertex someVertex = vertexFactory.createVertex();
    Tuple2<EPGMVertex, List<NeighborhoodVertex>> inputEmpty = new Tuple2<>(someVertex, Collections.emptyList());
    List<EPGMEdge> result = getExecutionEnvironment().fromElements(inputEmpty).flatMap(toTest).collect();
    assertEquals(0, result.size());
}
Also used : EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) List(java.util.List) Test(org.junit.Test)

Example 2 with EPGMEdge

use of org.gradoop.common.model.impl.pojo.EPGMEdge in project gradoop by dbs-leipzig.

the class CreateEdgesFromTripleTest method testFunction.

/**
 * Test the function by applying it to some tuples.
 *
 * @throws Exception when the execution in Flink fails.
 */
@Test
public void testFunction() throws Exception {
    CreateEdgesFromTriple<EPGMVertex, EPGMEdge> function = new CreateEdgesFromTriple<>(getConfig().getLogicalGraphFactory().getEdgeFactory(), "source", "target");
    VertexFactory<EPGMVertex> vertexFactory = getConfig().getLogicalGraphFactory().getVertexFactory();
    EPGMVertex testVertex1 = vertexFactory.createVertex();
    EPGMVertex testVertex2 = vertexFactory.createVertex();
    GradoopId source1 = GradoopId.get();
    GradoopId source2 = GradoopId.get();
    GradoopId target1 = GradoopId.get();
    GradoopId target2 = GradoopId.get();
    Tuple3<EPGMVertex, GradoopId, GradoopId> tuple1 = new Tuple3<>(testVertex1, source1, target1);
    Tuple3<EPGMVertex, GradoopId, GradoopId> tuple2 = new Tuple3<>(testVertex2, source2, target2);
    List<EPGMEdge> result = getExecutionEnvironment().fromElements(tuple1, tuple2).flatMap(function).collect();
    // Check if the correct number of edges were created and if they are distinct.
    assertEquals(4, result.size());
    // By id.
    assertEquals(4, result.stream().map(EPGMElement::getId).count());
    // By source and target id.
    assertEquals(4, result.stream().map(e -> Tuple2.of(e.getSourceId(), e.getTargetId())).distinct().count());
    // Finally check the data of the edges.
    for (EPGMEdge resultEdge : result) {
        if (resultEdge.getLabel().equals("source")) {
            if (resultEdge.getSourceId().equals(source1)) {
                assertEquals(testVertex1.getId(), resultEdge.getTargetId());
            } else if (resultEdge.getSourceId().equals(source2)) {
                assertEquals(testVertex2.getId(), resultEdge.getTargetId());
            } else {
                fail("EPGMEdge with invalid source ID created.");
            }
        } else if (resultEdge.getLabel().equals("target")) {
            if (resultEdge.getSourceId().equals(testVertex1.getId())) {
                assertEquals(target1, resultEdge.getTargetId());
            } else if (resultEdge.getSourceId().equals(testVertex2.getId())) {
                assertEquals(target2, resultEdge.getTargetId());
            } else {
                fail("EPGMEdge with invalid source ID created.");
            }
        } else {
            fail("EPGMEdge with invalid label created.");
        }
    }
}
Also used : List(java.util.List) EPGMElement(org.gradoop.common.model.impl.pojo.EPGMElement) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) VertexFactory(org.gradoop.common.model.api.entities.VertexFactory) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) GradoopFlinkTestBase(org.gradoop.flink.model.GradoopFlinkTestBase) Test(org.junit.Test) GradoopId(org.gradoop.common.model.impl.id.GradoopId) Assert(org.junit.Assert) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) EPGMElement(org.gradoop.common.model.impl.pojo.EPGMElement) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) Tuple3(org.apache.flink.api.java.tuple.Tuple3) GradoopId(org.gradoop.common.model.impl.id.GradoopId) Test(org.junit.Test)

Example 3 with EPGMEdge

use of org.gradoop.common.model.impl.pojo.EPGMEdge in project gradoop by dbs-leipzig.

the class ExtractPropertyFromVertexTest method testForEdgeDirection.

/**
 * A private convenience method for easier testing of different setups in the edge creation process.
 *
 * @param graph     The input graph for the tests.
 * @param direction The edge direction the graph is tested for.
 * @throws Exception Is thrown if the process cant be executed properly.
 */
private void testForEdgeDirection(LogicalGraph graph, EdgeDirection direction) throws Exception {
    Set<String> cities = new HashSet<>(Arrays.asList("Dresden", "Berlin", "Leipzig"));
    Set<String> persons = new HashSet<>(Arrays.asList("Eve", "Alice", "Frank", "Dave", "Bob", "Carol"));
    UnaryGraphToGraphOperator extract = new ExtractPropertyFromVertex("Person", "city", "City", "name", direction, "newLabel");
    LogicalGraph extractedGraph = graph.callForGraph(extract);
    long expectedEdgeCount = direction.equals(EdgeDirection.BIDIRECTIONAL) ? 12 : 6;
    Assert.assertEquals(expectedEdgeCount, extractedGraph.getEdgesByLabel("newLabel").count());
    List<EPGMVertex> vertices = new ArrayList<>();
    extractedGraph.getVertices().filter(new LabelIsIn<>("Person", "City")).output(new LocalCollectionOutputFormat<>(vertices));
    List<EPGMEdge> newEdges = new ArrayList<>();
    extractedGraph.getEdgesByLabel("newLabel").output(new LocalCollectionOutputFormat<>(newEdges));
    getConfig().getExecutionEnvironment().execute();
    Map<GradoopId, String> idMap = new HashMap<>();
    vertices.forEach(v -> idMap.put(v.getId(), v.getPropertyValue("name").getString()));
    for (EPGMEdge e : newEdges) {
        String sourceName = idMap.get(e.getSourceId());
        String targetName = idMap.get(e.getTargetId());
        if (direction.equals(EdgeDirection.ORIGIN_TO_NEWVERTEX)) {
            Assert.assertTrue("source: " + sourceName + " | target: " + targetName + " | edge direction: " + direction.name(), persons.contains(sourceName) && cities.contains(targetName));
        } else if (direction.equals(EdgeDirection.NEWVERTEX_TO_ORIGIN)) {
            Assert.assertTrue("source: " + sourceName + " | target: " + targetName + " | edge direction: " + direction.name(), cities.contains(sourceName) && persons.contains(targetName));
        } else if (direction.equals(EdgeDirection.BIDIRECTIONAL)) {
            boolean cityContainment = cities.contains(sourceName) || cities.contains(targetName);
            boolean personContainment = persons.contains(sourceName) || persons.contains(targetName);
            Assert.assertTrue("vertex name 1: " + sourceName + " | vertex name 2: " + targetName + " | edge direction: " + direction.name(), cityContainment && personContainment);
        }
    }
}
Also used : HashMap(java.util.HashMap) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) ArrayList(java.util.ArrayList) GradoopId(org.gradoop.common.model.impl.id.GradoopId) LabelIsIn(org.gradoop.flink.model.impl.functions.epgm.LabelIsIn) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) UnaryGraphToGraphOperator(org.gradoop.flink.model.api.operators.UnaryGraphToGraphOperator) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) HashSet(java.util.HashSet)

Example 4 with EPGMEdge

use of org.gradoop.common.model.impl.pojo.EPGMEdge in project gradoop by dbs-leipzig.

the class IndexedCSVDataSource method getGraphCollection.

@Override
public GraphCollection getGraphCollection() throws IOException {
    CSVMetaDataSource source = new CSVMetaDataSource();
    CSVMetaData metaData = source.readLocal(getMetaDataPath(), hdfsConfig);
    DataSet<Tuple3<String, String, String>> metaDataBroadcast = source.readDistributed(getMetaDataPath(), getConfig());
    ExecutionEnvironment env = getConfig().getExecutionEnvironment();
    GraphCollectionFactory factory = getConfig().getGraphCollectionFactory();
    Map<String, DataSet<EPGMGraphHead>> graphHeads = metaData.getGraphLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getGraphHeadCSVPath(label)).map(new CSVLineToGraphHead(factory.getGraphHeadFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(graphHead -> graphHead.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    Map<String, DataSet<EPGMVertex>> vertices = metaData.getVertexLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getVertexCSVPath(label)).map(new CSVLineToVertex(factory.getVertexFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(vertex -> vertex.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    Map<String, DataSet<EPGMEdge>> edges = metaData.getEdgeLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getEdgeCSVPath(label)).map(new CSVLineToEdge(factory.getEdgeFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(edge -> edge.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
    return factory.fromIndexedDataSets(graphHeads, vertices, edges);
}
Also used : Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GradoopFlinkConfig(org.gradoop.flink.util.GradoopFlinkConfig) IOException(java.io.IOException) GraphCollection(org.gradoop.flink.model.impl.epgm.GraphCollection) CSVLineToGraphHead(org.gradoop.flink.io.impl.csv.functions.CSVLineToGraphHead) Collectors(java.util.stream.Collectors) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) DataSource(org.gradoop.flink.io.api.DataSource) Objects(java.util.Objects) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) CSVMetaDataSource(org.gradoop.flink.io.impl.csv.metadata.CSVMetaDataSource) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) CSVBase(org.gradoop.flink.io.impl.csv.CSVBase) CSVLineToEdge(org.gradoop.flink.io.impl.csv.functions.CSVLineToEdge) CSVMetaData(org.gradoop.flink.io.impl.csv.metadata.CSVMetaData) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) CSVLineToVertex(org.gradoop.flink.io.impl.csv.functions.CSVLineToVertex) GraphCollectionFactory(org.gradoop.flink.model.impl.epgm.GraphCollectionFactory) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) CSVLineToGraphHead(org.gradoop.flink.io.impl.csv.functions.CSVLineToGraphHead) DataSet(org.apache.flink.api.java.DataSet) CSVLineToEdge(org.gradoop.flink.io.impl.csv.functions.CSVLineToEdge) CSVMetaData(org.gradoop.flink.io.impl.csv.metadata.CSVMetaData) Tuple3(org.apache.flink.api.java.tuple.Tuple3) CSVLineToVertex(org.gradoop.flink.io.impl.csv.functions.CSVLineToVertex) CSVMetaDataSource(org.gradoop.flink.io.impl.csv.metadata.CSVMetaDataSource) GraphCollectionFactory(org.gradoop.flink.model.impl.epgm.GraphCollectionFactory)

Example 5 with EPGMEdge

use of org.gradoop.common.model.impl.pojo.EPGMEdge in project gradoop by dbs-leipzig.

the class VertexFusion method execute.

/**
 * Fusing the already-combined sources.
 *
 * @param searchGraph   Logical Graph defining the data lake
 * @param graphPatterns Collection of elements representing which vertices will be merged into
 *                      a vertex
 * @return              A single merged graph
 */
public LogicalGraph execute(LogicalGraph searchGraph, GraphCollection graphPatterns) {
    // Missing in the theoric definition: creating a new header
    GradoopId newGraphid = GradoopId.get();
    DataSet<EPGMGraphHead> gh = searchGraph.getGraphHead().map(new MapGraphHeadForNewGraph(newGraphid));
    DataSet<GradoopId> patternVertexIds = graphPatterns.getVertices().map(new Id<>());
    DataSet<GradoopId> patternEdgeIds = graphPatterns.getEdges().map(new Id<>());
    // PHASE 1: Induced Subgraphs
    // Associate each vertex to its graph id
    DataSet<Tuple2<EPGMVertex, GradoopId>> patternVerticesWithGraphIDs = graphPatterns.getVertices().coGroup(searchGraph.getVertices()).where(new Id<>()).equalTo(new Id<>()).with(new LeftSide<>()).flatMap(new MapVertexToPairWithGraphId());
    // Associate each gid in hypervertices.H to the merged vertices
    DataSet<Tuple2<EPGMVertex, GradoopId>> mergedVertices = graphPatterns.getGraphHeads().map(new CoGroupGraphHeadToVertex());
    // PHASE 2: Recreating the vertices
    DataSet<EPGMVertex> vi = searchGraph.getVertices().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternVertexIds, IdNotInBroadcast.IDS);
    DataSet<Tuple2<EPGMVertex, GradoopId>> idJoin = patternVerticesWithGraphIDs.coGroup(mergedVertices).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new CoGroupAssociateOldVerticesWithNewIds()).union(vi.map(new MapVerticesAsTuplesWithNullId()));
    DataSet<EPGMVertex> vToRet = mergedVertices.coGroup(patternVerticesWithGraphIDs).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new LeftSide<>()).map(new Value0Of2<>()).union(vi).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    // PHASE 3: Recreating the edges
    DataSet<EPGMEdge> edges = searchGraph.getEdges().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternEdgeIds, IdNotInBroadcast.IDS).leftOuterJoin(idJoin).where(new SourceId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(true)).leftOuterJoin(idJoin).where(new TargetId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(false)).groupBy(new Id<>()).reduceGroup(new AddNewIdToDuplicatedEdge()).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
    return searchGraph.getFactory().fromDataSets(gh, vToRet, edges);
}
Also used : EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) IdNotInBroadcast(org.gradoop.flink.model.impl.functions.epgm.IdNotInBroadcast) LeftSide(org.gradoop.flink.model.impl.functions.utils.LeftSide) Value1Of2(org.gradoop.flink.model.impl.functions.tuple.Value1Of2) CoGroupAssociateOldVerticesWithNewIds(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupAssociateOldVerticesWithNewIds) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) CoGroupGraphHeadToVertex(org.gradoop.flink.model.impl.operators.fusion.functions.CoGroupGraphHeadToVertex) FlatJoinSourceEdgeReference(org.gradoop.flink.model.impl.operators.fusion.functions.FlatJoinSourceEdgeReference) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourceId(org.gradoop.flink.model.impl.functions.epgm.SourceId) LeftElementId(org.gradoop.flink.model.impl.operators.fusion.functions.LeftElementId) Id(org.gradoop.flink.model.impl.functions.epgm.Id) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapVertexToPairWithGraphId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVertexToPairWithGraphId) GradoopId(org.gradoop.common.model.impl.id.GradoopId) TargetId(org.gradoop.flink.model.impl.functions.epgm.TargetId) MapVerticesAsTuplesWithNullId(org.gradoop.flink.model.impl.operators.fusion.functions.MapVerticesAsTuplesWithNullId) MapGraphHeadForNewGraph(org.gradoop.flink.model.impl.operators.fusion.functions.MapGraphHeadForNewGraph)

Aggregations

EPGMEdge (org.gradoop.common.model.impl.pojo.EPGMEdge)174 EPGMVertex (org.gradoop.common.model.impl.pojo.EPGMVertex)136 EPGMGraphHead (org.gradoop.common.model.impl.pojo.EPGMGraphHead)89 Test (org.junit.Test)75 LogicalGraph (org.gradoop.flink.model.impl.epgm.LogicalGraph)41 Test (org.testng.annotations.Test)39 GradoopId (org.gradoop.common.model.impl.id.GradoopId)36 FlinkAsciiGraphLoader (org.gradoop.flink.util.FlinkAsciiGraphLoader)35 GraphCollection (org.gradoop.flink.model.impl.epgm.GraphCollection)30 ArrayList (java.util.ArrayList)21 GradoopIdSet (org.gradoop.common.model.impl.id.GradoopIdSet)21 Embedding (org.gradoop.flink.model.impl.operators.matching.single.cypher.pojos.Embedding)21 List (java.util.List)19 Properties (org.gradoop.common.model.impl.properties.Properties)18 CNF (org.gradoop.flink.model.impl.operators.matching.common.query.predicates.CNF)18 PhysicalOperatorTest (org.gradoop.flink.model.impl.operators.matching.single.cypher.operators.PhysicalOperatorTest)18 Collectors (java.util.stream.Collectors)15 EPGMEdgeFactory (org.gradoop.common.model.impl.pojo.EPGMEdgeFactory)15 PropertyValue (org.gradoop.common.model.impl.properties.PropertyValue)15 HBaseEPGMStore (org.gradoop.storage.hbase.impl.HBaseEPGMStore)14