Search in sources :

Example 21 with Graph

use of uk.gov.gchq.gaffer.graph.Graph in project Gaffer by gchq.

the class ImportKeyValuePairRDDToAccumuloHandlerTest method checkImportRDDOfElements.

@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema()), ACCUMULO_ELEMENT_CONVERTER_CLASS_TAG));
    final RDD<Tuple2<Key, Value>> elementRDD = sparkContext.parallelize(elements, 1, ELEMENT_CLASS_TAG).flatMap(func, TUPLE2_CLASS_TAG);
    final ImportKeyValuePairRDDToAccumulo addRdd = new ImportKeyValuePairRDDToAccumulo.Builder().input(elementRDD).outputPath(outputPath).failurePath(failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    Collections.addAll(results, returnedElements);
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) ImportKeyValuePairRDDToAccumulo(uk.gov.gchq.gaffer.sparkaccumulo.operation.scalardd.ImportKeyValuePairRDDToAccumulo) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) Tuple2(scala.Tuple2) ArrayBuffer(scala.collection.mutable.ArrayBuffer) ByteEntityAccumuloElementConverter(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityAccumuloElementConverter) ElementConverterFunction(uk.gov.gchq.gaffer.sparkaccumulo.operation.utils.scala.ElementConverterFunction) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) Test(org.junit.Test)

Example 22 with Graph

use of uk.gov.gchq.gaffer.graph.Graph in project Gaffer by gchq.

the class ImportRDDOfElementsHandlerTest method checkImportRDDOfElements.

@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final RDD<Element> elementRDD = sparkContext.parallelize(elements, 8, ELEMENT_CLASS_TAG);
    final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder().sparkContext(sparkContext).input(elementRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) ImportRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.ImportRDDOfElements) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) ArrayBuffer(scala.collection.mutable.ArrayBuffer) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with Graph

use of uk.gov.gchq.gaffer.graph.Graph in project Gaffer by gchq.

the class GetJavaRDDOfElementsExample method runExamples.

@Override
public void runExamples() {
    // Need to actively turn logging on and off as needed as Spark produces some logs
    // even when the log level is set to off.
    ROOT_LOGGER.setLevel(Level.OFF);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("GetJavaRDDOfElementsExample").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sc = new JavaSparkContext(sparkConf);
    sc.setLogLevel("OFF");
    final Graph graph = getGraph();
    try {
        getJavaRddOfElements(sc, graph);
        getJavaRddOfElementsReturningEdgesOnly(sc, graph);
    } catch (final OperationException e) {
        throw new RuntimeException(e);
    }
    sc.stop();
    ROOT_LOGGER.setLevel(Level.INFO);
}
Also used : Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) OperationException(uk.gov.gchq.gaffer.operation.OperationException)

Example 24 with Graph

use of uk.gov.gchq.gaffer.graph.Graph in project Gaffer by gchq.

the class GraphConfigurationServiceTest method setup.

@Before
public void setup() {
    final Store store = mock(Store.class);
    final Schema schema = mock(Schema.class);
    final Set<StoreTrait> traits = new HashSet<>(Arrays.asList(STORE_AGGREGATION, PRE_AGGREGATION_FILTERING, POST_TRANSFORMATION_FILTERING, POST_AGGREGATION_FILTERING, TRANSFORMATION, STORE_VALIDATION));
    given(store.getSchema()).willReturn(schema);
    final Graph graph = new Graph.Builder().store(store).build();
    final Set<Class<? extends Operation>> operations = new HashSet<>();
    operations.add(AddElements.class);
    given(graphFactory.getGraph()).willReturn(graph);
    given(graph.getSupportedOperations()).willReturn(operations);
    given(graph.isSupported(AddElements.class)).willReturn(true);
    given(userFactory.createUser()).willReturn(new User());
    given(graph.getStoreTraits()).willReturn(traits);
}
Also used : Graph(uk.gov.gchq.gaffer.graph.Graph) User(uk.gov.gchq.gaffer.user.User) StoreTrait(uk.gov.gchq.gaffer.store.StoreTrait) Schema(uk.gov.gchq.gaffer.store.schema.Schema) Store(uk.gov.gchq.gaffer.store.Store) Operation(uk.gov.gchq.gaffer.operation.Operation) HashSet(java.util.HashSet) Before(org.junit.Before)

Example 25 with Graph

use of uk.gov.gchq.gaffer.graph.Graph in project Gaffer by gchq.

the class LoadAndQuery12 method run.

public Iterable<Entity> run() throws OperationException {
    // [user] Create a user
    // ---------------------------------------------------------
    final User user = new User("user01");
    // ---------------------------------------------------------
    // [graph] create a graph using our schema and store properties
    // ---------------------------------------------------------
    final Graph graph = new Graph.Builder().addSchemas(getSchemas()).storeProperties(getStoreProperties()).build();
    // ---------------------------------------------------------
    // [add] add the edges to the graph
    // ---------------------------------------------------------
    final Set<String> dummyData = Collections.singleton("");
    final OperationChain addOpChain = new OperationChain.Builder().first(new GenerateElements.Builder<String>().generator(new DataGenerator12()).objects(dummyData).build()).then(new AddElements()).build();
    graph.execute(addOpChain, user);
    // ---------------------------------------------------------
    log("Added the edge A-B 1000 times each time with a ReservoirItemsUnion<String> containing a random string." + " Also added 500 edges X-Y0, X-Y1, ..., X-Y499 each and for each an Entity on X with a" + " ReservoirItemsUnion<String> containing the destination node.");
    // [get red edge] Get the red edge
    // ---------------------------------------------------------
    final GetAllEdges getAllEdges = new GetAllEdges.Builder().view(new View.Builder().edge("red").build()).build();
    final Iterable<Edge> allEdges = graph.execute(getAllEdges, user);
    // ---------------------------------------------------------
    log("\nThe red edge A-B:");
    for (final Edge edge : allEdges) {
        log("GET_A-B_EDGE_RESULT", edge.toString());
    }
    // [get strings sample from the red edge] Get the edge A-B and print out the sample of strings
    // ---------------------------------------------------------
    final GetEdges<EdgeSeed> query = new GetEdges.Builder<EdgeSeed>().addSeed(new EdgeSeed("A", "B", false)).build();
    final Iterable<Edge> edges = graph.execute(query, user);
    final Edge edge = edges.iterator().next();
    final ReservoirItemsSketch<String> stringsSketch = ((ReservoirItemsUnion<String>) edge.getProperty("stringsSample")).getResult();
    final String[] samples = stringsSketch.getSamples();
    final StringBuilder sb = new StringBuilder("10 samples: ");
    for (int i = 0; i < 10 && i < samples.length; i++) {
        if (i > 0) {
            sb.append(", ");
        }
        sb.append(samples[i]);
    }
    // ---------------------------------------------------------
    log("\nEdge A-B with a sample of the strings");
    log("GET_SAMPLE_FOR_RED_EDGE", sb.toString());
    // [get sample from the blue entity] Get the entity Y and print a sample of the neighbours
    // ---------------------------------------------------------
    final GetEntities<EntitySeed> query2 = new GetEntities.Builder<EntitySeed>().addSeed(new EntitySeed("X")).build();
    final Iterable<Entity> entities = graph.execute(query2, user);
    final Entity entity = entities.iterator().next();
    final ReservoirItemsSketch<String> neighboursSketch = ((ReservoirItemsUnion<String>) entity.getProperty("neighboursSample")).getResult();
    final String[] neighboursSample = neighboursSketch.getSamples();
    sb.setLength(0);
    sb.append("10 samples: ");
    for (int i = 0; i < 10 && i < neighboursSample.length; i++) {
        if (i > 0) {
            sb.append(", ");
        }
        sb.append(neighboursSample[i]);
    }
    // ---------------------------------------------------------
    log("\nEntity for vertex X with a sample of its neighbouring vertices");
    log("GET_SAMPLES_FOR_X_RESULT", sb.toString());
    return null;
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) DataGenerator12(uk.gov.gchq.gaffer.example.gettingstarted.generator.DataGenerator12) Graph(uk.gov.gchq.gaffer.graph.Graph) GetAllEdges(uk.gov.gchq.gaffer.operation.impl.get.GetAllEdges) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) ReservoirItemsUnion(com.yahoo.sketches.sampling.ReservoirItemsUnion)

Aggregations

Graph (uk.gov.gchq.gaffer.graph.Graph)72 User (uk.gov.gchq.gaffer.user.User)52 Test (org.junit.Test)45 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)36 Edge (uk.gov.gchq.gaffer.data.element.Edge)29 Entity (uk.gov.gchq.gaffer.data.element.Entity)22 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)22 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)20 HashSet (java.util.HashSet)18 Element (uk.gov.gchq.gaffer.data.element.Element)16 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)16 ArrayList (java.util.ArrayList)15 SparkConf (org.apache.spark.SparkConf)13 JSONSerialiser (uk.gov.gchq.gaffer.jsonserialisation.JSONSerialiser)11 DataOutputStream (java.io.DataOutputStream)10 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)10 Configuration (org.apache.hadoop.conf.Configuration)10 CloseableIterable (uk.gov.gchq.gaffer.commonutil.iterable.CloseableIterable)10 SQLContext (org.apache.spark.sql.SQLContext)9 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)9