use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.
the class IndexedCSVDataSource method getGraphCollection.
@Override
public GraphCollection getGraphCollection() throws IOException {
CSVMetaDataSource source = new CSVMetaDataSource();
CSVMetaData metaData = source.readLocal(getMetaDataPath(), hdfsConfig);
DataSet<Tuple3<String, String, String>> metaDataBroadcast = source.readDistributed(getMetaDataPath(), getConfig());
ExecutionEnvironment env = getConfig().getExecutionEnvironment();
GraphCollectionFactory factory = getConfig().getGraphCollectionFactory();
Map<String, DataSet<EPGMGraphHead>> graphHeads = metaData.getGraphLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getGraphHeadCSVPath(label)).map(new CSVLineToGraphHead(factory.getGraphHeadFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(graphHead -> graphHead.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
Map<String, DataSet<EPGMVertex>> vertices = metaData.getVertexLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getVertexCSVPath(label)).map(new CSVLineToVertex(factory.getVertexFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(vertex -> vertex.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
Map<String, DataSet<EPGMEdge>> edges = metaData.getEdgeLabels().stream().map(label -> Tuple2.of(label, env.readTextFile(getEdgeCSVPath(label)).map(new CSVLineToEdge(factory.getEdgeFactory())).withBroadcastSet(metaDataBroadcast, BC_METADATA).filter(edge -> edge.getLabel().equals(label)))).collect(Collectors.toMap(t -> t.f0, t -> t.f1));
return factory.fromIndexedDataSets(graphHeads, vertices, edges);
}
use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.
the class VertexFusion method execute.
/**
* Fusing the already-combined sources.
*
* @param searchGraph Logical Graph defining the data lake
* @param graphPatterns Collection of elements representing which vertices will be merged into
* a vertex
* @return A single merged graph
*/
public LogicalGraph execute(LogicalGraph searchGraph, GraphCollection graphPatterns) {
// Missing in the theoric definition: creating a new header
GradoopId newGraphid = GradoopId.get();
DataSet<EPGMGraphHead> gh = searchGraph.getGraphHead().map(new MapGraphHeadForNewGraph(newGraphid));
DataSet<GradoopId> patternVertexIds = graphPatterns.getVertices().map(new Id<>());
DataSet<GradoopId> patternEdgeIds = graphPatterns.getEdges().map(new Id<>());
// PHASE 1: Induced Subgraphs
// Associate each vertex to its graph id
DataSet<Tuple2<EPGMVertex, GradoopId>> patternVerticesWithGraphIDs = graphPatterns.getVertices().coGroup(searchGraph.getVertices()).where(new Id<>()).equalTo(new Id<>()).with(new LeftSide<>()).flatMap(new MapVertexToPairWithGraphId());
// Associate each gid in hypervertices.H to the merged vertices
DataSet<Tuple2<EPGMVertex, GradoopId>> mergedVertices = graphPatterns.getGraphHeads().map(new CoGroupGraphHeadToVertex());
// PHASE 2: Recreating the vertices
DataSet<EPGMVertex> vi = searchGraph.getVertices().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternVertexIds, IdNotInBroadcast.IDS);
DataSet<Tuple2<EPGMVertex, GradoopId>> idJoin = patternVerticesWithGraphIDs.coGroup(mergedVertices).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new CoGroupAssociateOldVerticesWithNewIds()).union(vi.map(new MapVerticesAsTuplesWithNullId()));
DataSet<EPGMVertex> vToRet = mergedVertices.coGroup(patternVerticesWithGraphIDs).where(new Value1Of2<>()).equalTo(new Value1Of2<>()).with(new LeftSide<>()).map(new Value0Of2<>()).union(vi).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
// PHASE 3: Recreating the edges
DataSet<EPGMEdge> edges = searchGraph.getEdges().filter(new IdNotInBroadcast<>()).withBroadcastSet(patternEdgeIds, IdNotInBroadcast.IDS).leftOuterJoin(idJoin).where(new SourceId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(true)).leftOuterJoin(idJoin).where(new TargetId<>()).equalTo(new LeftElementId<>()).with(new FlatJoinSourceEdgeReference(false)).groupBy(new Id<>()).reduceGroup(new AddNewIdToDuplicatedEdge()).map(new MapFunctionAddGraphElementToGraph2<>(newGraphid));
return searchGraph.getFactory().fromDataSets(gh, vToRet, edges);
}
use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.
the class BaseFactory method createGraphHeadDataSet.
/**
* Creates a graph head dataset from a given collection.
* Encapsulates the workaround for dataset creation from an empty collection.
*
* @param graphHeads graph heads
* @return graph head dataset
*/
protected DataSet<EPGMGraphHead> createGraphHeadDataSet(Collection<EPGMGraphHead> graphHeads) {
ExecutionEnvironment env = getConfig().getExecutionEnvironment();
DataSet<EPGMGraphHead> graphHeadSet;
if (graphHeads.isEmpty()) {
graphHeadSet = env.fromElements(getGraphHeadFactory().createGraphHead()).filter(new False<>());
} else {
graphHeadSet = env.fromCollection(graphHeads);
}
return graphHeadSet;
}
use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.
the class RollUp method execute.
/**
* Applies the rollUp operation on the given input graph.
*
* @param graph input graph
* @return graphCollection containing all differently grouped graphs
*/
@Override
public GraphCollection execute(LogicalGraph graph) {
DataSet<EPGMGraphHead> graphHeads = null;
DataSet<EPGMVertex> vertices = null;
DataSet<EPGMEdge> edges = null;
List<List<String>> groupingKeyCombinations = getGroupingKeyCombinations();
// for each permutation execute a grouping
for (List<String> combination : groupingKeyCombinations) {
// apply the grouping
LogicalGraph groupedGraph = applyGrouping(graph, combination);
// add a property to the grouped graph's head to specify the used keys
PropertyValue groupingKeys = PropertyValue.create(String.join(",", combination));
DataSet<EPGMGraphHead> newGraphHead = groupedGraph.getGraphHead().map(new SetProperty<>(getGraphPropertyKey(), groupingKeys));
if (graphHeads != null && vertices != null && edges != null) {
// in later iterations union the datasets of the grouped elements with the existing ones
graphHeads = graphHeads.union(newGraphHead);
vertices = vertices.union(groupedGraph.getVertices());
edges = edges.union(groupedGraph.getEdges());
} else {
// in the first iteration, fill the datasets
graphHeads = newGraphHead;
vertices = groupedGraph.getVertices();
edges = groupedGraph.getEdges();
}
}
// We initialized the DataSets with null, so it may be possible that they're still null here,
// so we should check and return an empty collection in this case.
// But the overhead of creating an empty collection should only be done, if at least one of the
// DataSets is null.
GraphCollection collection;
if (graphHeads != null && vertices != null && edges != null) {
collection = graph.getCollectionFactory().fromDataSets(graphHeads, vertices, edges);
} else {
collection = graph.getCollectionFactory().createEmptyCollection();
}
return collection;
}
use of org.gradoop.common.model.impl.pojo.EPGMGraphHead in project gradoop by dbs-leipzig.
the class AverageIncomingDegree method execute.
@Override
public LogicalGraph execute(LogicalGraph graph) {
graph = graph.aggregate(new VertexCount());
DataSet<EPGMGraphHead> newGraphHead = new IncomingVertexDegrees().execute(graph).sum(1).crossWithTiny(graph.getGraphHead().first(1)).with(new AddSumDegreesToGraphHeadCrossFunction(SamplingEvaluationConstants.PROPERTY_KEY_SUM_DEGREES)).map(new CalculateAverageDegree(SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_INCOMING_DEGREE));
return graph.getFactory().fromDataSets(newGraphHead, graph.getVertices(), graph.getEdges());
}
Aggregations