Search in sources :

Example 16 with SparkConf

use of org.apache.spark.SparkConf in project deeplearning4j by deeplearning4j.

the class BaseSparkTest method getContext.

/**
     *
     * @return
     */
public JavaSparkContext getContext() {
    if (sc != null)
        return sc;
    // set to test mode
    SparkConf sparkConf = new SparkConf().setMaster("local[4]").setAppName("sparktest").set(Word2VecVariables.NUM_WORDS, String.valueOf(1));
    sc = new JavaSparkContext(sparkConf);
    return sc;
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Example 17 with SparkConf

use of org.apache.spark.SparkConf in project deeplearning4j by deeplearning4j.

the class TextPipelineTest method before.

@Before
public void before() throws Exception {
    conf = new SparkConf().setMaster("local[4]").setAppName("sparktest");
    // All the avaliable options. These are default values
    word2vec = new Word2Vec.Builder().minWordFrequency(1).setNGrams(1).tokenizerFactory("org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory").tokenPreprocessor("org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor").stopWords(StopWords.getStopWords()).seed(42L).negative(0).useAdaGrad(false).layerSize(100).windowSize(5).learningRate(0.025).minLearningRate(0.0001).iterations(1).build();
    word2vecNoStop = new Word2Vec.Builder().minWordFrequency(1).setNGrams(1).tokenizerFactory("org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory").tokenPreprocessor("org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor").seed(42L).negative(0).useAdaGrad(false).layerSize(100).windowSize(5).learningRate(0.025).minLearningRate(0.0001).iterations(1).build();
    sentenceList = Arrays.asList("This is a strange strange world.", "Flowers are red.");
}
Also used : Word2Vec(org.deeplearning4j.spark.models.embeddings.word2vec.Word2Vec) SparkConf(org.apache.spark.SparkConf) Before(org.junit.Before)

Example 18 with SparkConf

use of org.apache.spark.SparkConf in project deeplearning4j by deeplearning4j.

the class TestTrainingStatsCollection method testStatsCollection.

@Test
public void testStatsCollection() throws Exception {
    int nWorkers = 4;
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[" + nWorkers + "]");
    sparkConf.setAppName("Test");
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    try {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()).pretrain(false).backprop(true).build();
        int miniBatchSizePerWorker = 10;
        int averagingFrequency = 5;
        int numberOfAveragings = 3;
        int totalExamples = nWorkers * miniBatchSizePerWorker * averagingFrequency * numberOfAveragings;
        Nd4j.getRandom().setSeed(12345);
        List<DataSet> list = new ArrayList<>();
        for (int i = 0; i < totalExamples; i++) {
            INDArray f = Nd4j.rand(1, 10);
            INDArray l = Nd4j.rand(1, 10);
            DataSet ds = new DataSet(f, l);
            list.add(ds);
        }
        JavaRDD<DataSet> rdd = sc.parallelize(list);
        rdd.repartition(4);
        ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(nWorkers, 1).averagingFrequency(averagingFrequency).batchSizePerWorker(miniBatchSizePerWorker).saveUpdater(true).workerPrefetchNumBatches(0).repartionData(Repartition.Always).build();
        SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, tm);
        sparkNet.setCollectTrainingStats(true);
        sparkNet.fit(rdd);
        //Collect the expected keys:
        List<String> expectedStatNames = new ArrayList<>();
        Class<?>[] classes = new Class[] { CommonSparkTrainingStats.class, ParameterAveragingTrainingMasterStats.class, ParameterAveragingTrainingWorkerStats.class };
        String[] fieldNames = new String[] { "columnNames", "columnNames", "columnNames" };
        for (int i = 0; i < classes.length; i++) {
            Field field = classes[i].getDeclaredField(fieldNames[i]);
            field.setAccessible(true);
            Object f = field.get(null);
            Collection<String> c = (Collection<String>) f;
            expectedStatNames.addAll(c);
        }
        System.out.println(expectedStatNames);
        SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
        Set<String> actualKeySet = stats.getKeySet();
        assertEquals(expectedStatNames.size(), actualKeySet.size());
        for (String s : stats.getKeySet()) {
            assertTrue(expectedStatNames.contains(s));
            assertNotNull(stats.getValue(s));
        }
        String statsAsString = stats.statsAsString();
        System.out.println(statsAsString);
        //One line per stat
        assertEquals(actualKeySet.size(), statsAsString.split("\n").length);
        //Go through nested stats
        //First: master stats
        assertTrue(stats instanceof ParameterAveragingTrainingMasterStats);
        ParameterAveragingTrainingMasterStats masterStats = (ParameterAveragingTrainingMasterStats) stats;
        List<EventStats> exportTimeStats = masterStats.getParameterAveragingMasterExportTimesMs();
        assertEquals(1, exportTimeStats.size());
        assertDurationGreaterZero(exportTimeStats);
        assertNonNullFields(exportTimeStats);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(exportTimeStats, 1, 1, 1);
        List<EventStats> countRddTime = masterStats.getParameterAveragingMasterCountRddSizeTimesMs();
        //occurs once per fit
        assertEquals(1, countRddTime.size());
        assertDurationGreaterEqZero(countRddTime);
        assertNonNullFields(countRddTime);
        //should occur only in master once
        assertExpectedNumberMachineIdsJvmIdsThreadIds(countRddTime, 1, 1, 1);
        List<EventStats> broadcastCreateTime = masterStats.getParameterAveragingMasterBroadcastCreateTimesMs();
        assertEquals(numberOfAveragings, broadcastCreateTime.size());
        assertDurationGreaterEqZero(broadcastCreateTime);
        assertNonNullFields(broadcastCreateTime);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(broadcastCreateTime, 1, 1, 1);
        List<EventStats> fitTimes = masterStats.getParameterAveragingMasterFitTimesMs();
        //i.e., number of times fit(JavaRDD<DataSet>) was called
        assertEquals(1, fitTimes.size());
        assertDurationGreaterZero(fitTimes);
        assertNonNullFields(fitTimes);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(fitTimes, 1, 1, 1);
        List<EventStats> splitTimes = masterStats.getParameterAveragingMasterSplitTimesMs();
        //Splitting of the data set is executed once only (i.e., one fit(JavaRDD<DataSet>) call)
        assertEquals(1, splitTimes.size());
        assertDurationGreaterEqZero(splitTimes);
        assertNonNullFields(splitTimes);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(splitTimes, 1, 1, 1);
        List<EventStats> aggregateTimesMs = masterStats.getParamaterAveragingMasterAggregateTimesMs();
        assertEquals(numberOfAveragings, aggregateTimesMs.size());
        assertDurationGreaterEqZero(aggregateTimesMs);
        assertNonNullFields(aggregateTimesMs);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(aggregateTimesMs, 1, 1, 1);
        List<EventStats> processParamsTimesMs = masterStats.getParameterAveragingMasterProcessParamsUpdaterTimesMs();
        assertEquals(numberOfAveragings, processParamsTimesMs.size());
        assertDurationGreaterEqZero(processParamsTimesMs);
        assertNonNullFields(processParamsTimesMs);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(processParamsTimesMs, 1, 1, 1);
        List<EventStats> repartitionTimesMs = masterStats.getParameterAveragingMasterRepartitionTimesMs();
        assertEquals(numberOfAveragings, repartitionTimesMs.size());
        assertDurationGreaterEqZero(repartitionTimesMs);
        assertNonNullFields(repartitionTimesMs);
        //only 1 thread for master
        assertExpectedNumberMachineIdsJvmIdsThreadIds(repartitionTimesMs, 1, 1, 1);
        //Second: Common spark training stats
        SparkTrainingStats commonStats = masterStats.getNestedTrainingStats();
        assertNotNull(commonStats);
        assertTrue(commonStats instanceof CommonSparkTrainingStats);
        CommonSparkTrainingStats cStats = (CommonSparkTrainingStats) commonStats;
        List<EventStats> workerFlatMapTotalTimeMs = cStats.getWorkerFlatMapTotalTimeMs();
        assertEquals(numberOfAveragings * nWorkers, workerFlatMapTotalTimeMs.size());
        assertDurationGreaterZero(workerFlatMapTotalTimeMs);
        assertNonNullFields(workerFlatMapTotalTimeMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(workerFlatMapTotalTimeMs, 1, 1, nWorkers);
        List<EventStats> workerFlatMapGetInitialModelTimeMs = cStats.getWorkerFlatMapGetInitialModelTimeMs();
        assertEquals(numberOfAveragings * nWorkers, workerFlatMapGetInitialModelTimeMs.size());
        assertDurationGreaterEqZero(workerFlatMapGetInitialModelTimeMs);
        assertNonNullFields(workerFlatMapGetInitialModelTimeMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(workerFlatMapGetInitialModelTimeMs, 1, 1, nWorkers);
        List<EventStats> workerFlatMapDataSetGetTimesMs = cStats.getWorkerFlatMapDataSetGetTimesMs();
        int numMinibatchesProcessed = workerFlatMapDataSetGetTimesMs.size();
        //1 for every time we get a data set
        int expectedNumMinibatchesProcessed = numberOfAveragings * nWorkers * averagingFrequency;
        //Sometimes random split is just bad - some executors might miss out on getting the expected amount of data
        assertTrue(numMinibatchesProcessed >= expectedNumMinibatchesProcessed - 5);
        List<EventStats> workerFlatMapProcessMiniBatchTimesMs = cStats.getWorkerFlatMapProcessMiniBatchTimesMs();
        assertTrue(workerFlatMapProcessMiniBatchTimesMs.size() >= numberOfAveragings * nWorkers * averagingFrequency - 5);
        assertDurationGreaterEqZero(workerFlatMapProcessMiniBatchTimesMs);
        assertNonNullFields(workerFlatMapDataSetGetTimesMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(workerFlatMapDataSetGetTimesMs, 1, 1, nWorkers);
        //Third: ParameterAveragingTrainingWorker stats
        SparkTrainingStats paramAvgStats = cStats.getNestedTrainingStats();
        assertNotNull(paramAvgStats);
        assertTrue(paramAvgStats instanceof ParameterAveragingTrainingWorkerStats);
        ParameterAveragingTrainingWorkerStats pStats = (ParameterAveragingTrainingWorkerStats) paramAvgStats;
        List<EventStats> parameterAveragingWorkerBroadcastGetValueTimeMs = pStats.getParameterAveragingWorkerBroadcastGetValueTimeMs();
        assertEquals(numberOfAveragings * nWorkers, parameterAveragingWorkerBroadcastGetValueTimeMs.size());
        assertDurationGreaterEqZero(parameterAveragingWorkerBroadcastGetValueTimeMs);
        assertNonNullFields(parameterAveragingWorkerBroadcastGetValueTimeMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(parameterAveragingWorkerBroadcastGetValueTimeMs, 1, 1, nWorkers);
        List<EventStats> parameterAveragingWorkerInitTimeMs = pStats.getParameterAveragingWorkerInitTimeMs();
        assertEquals(numberOfAveragings * nWorkers, parameterAveragingWorkerInitTimeMs.size());
        assertDurationGreaterEqZero(parameterAveragingWorkerInitTimeMs);
        assertNonNullFields(parameterAveragingWorkerInitTimeMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(parameterAveragingWorkerInitTimeMs, 1, 1, nWorkers);
        List<EventStats> parameterAveragingWorkerFitTimesMs = pStats.getParameterAveragingWorkerFitTimesMs();
        assertTrue(parameterAveragingWorkerFitTimesMs.size() >= numberOfAveragings * nWorkers * averagingFrequency - 5);
        assertDurationGreaterEqZero(parameterAveragingWorkerFitTimesMs);
        assertNonNullFields(parameterAveragingWorkerFitTimesMs);
        assertExpectedNumberMachineIdsJvmIdsThreadIds(parameterAveragingWorkerFitTimesMs, 1, 1, nWorkers);
        assertNull(pStats.getNestedTrainingStats());
        //Finally: try exporting stats
        String tempDir = System.getProperty("java.io.tmpdir");
        String outDir = FilenameUtils.concat(tempDir, "dl4j_testTrainingStatsCollection");
        stats.exportStatFiles(outDir, sc.sc());
        String htmlPlotsPath = FilenameUtils.concat(outDir, "AnalysisPlots.html");
        StatsUtils.exportStatsAsHtml(stats, htmlPlotsPath, sc);
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        StatsUtils.exportStatsAsHTML(stats, baos);
        baos.close();
        byte[] bytes = baos.toByteArray();
        String str = new String(bytes, "UTF-8");
    //            System.out.println(str);
    } finally {
        sc.stop();
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) ParameterAveragingTrainingMasterStats(org.deeplearning4j.spark.impl.paramavg.stats.ParameterAveragingTrainingMasterStats) DataSet(org.nd4j.linalg.dataset.DataSet) CommonSparkTrainingStats(org.deeplearning4j.spark.api.stats.CommonSparkTrainingStats) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) Field(java.lang.reflect.Field) EventStats(org.deeplearning4j.spark.stats.EventStats) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) SparkDl4jMultiLayer(org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) ParameterAveragingTrainingWorkerStats(org.deeplearning4j.spark.impl.paramavg.stats.ParameterAveragingTrainingWorkerStats) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ParameterAveragingTrainingMaster(org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CommonSparkTrainingStats(org.deeplearning4j.spark.api.stats.CommonSparkTrainingStats) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 19 with SparkConf

use of org.apache.spark.SparkConf in project mongo-hadoop by mongodb.

the class Enron method run.

public void run() {
    JavaSparkContext sc = new JavaSparkContext(new SparkConf());
    // Set configuration options for the MongoDB Hadoop Connector.
    Configuration mongodbConfig = new Configuration();
    // MongoInputFormat allows us to read from a live MongoDB instance.
    // We could also use BSONFileInputFormat to read BSON snapshots.
    mongodbConfig.set("mongo.job.input.format", "com.mongodb.hadoop.MongoInputFormat");
    // MongoDB connection string naming a collection to use.
    // If using BSON, use "mapred.input.dir" to configure the directory
    // where BSON files are located instead.
    mongodbConfig.set("mongo.input.uri", "mongodb://localhost:27017/enron_mail.messages");
    // Create an RDD backed by the MongoDB collection.
    JavaPairRDD<Object, BSONObject> documents = sc.newAPIHadoopRDD(// Configuration
    mongodbConfig, // InputFormat: read from a live cluster.
    MongoInputFormat.class, // Key class
    Object.class, // Value class
    BSONObject.class);
    JavaRDD<String> edges = documents.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, String>() {

        @Override
        public Iterable<String> call(final Tuple2<Object, BSONObject> t) throws Exception {
            BSONObject header = (BSONObject) t._2().get("headers");
            String to = (String) header.get("To");
            String from = (String) header.get("From");
            // each tuple in the set is an individual from|to pair
            //JavaPairRDD<String, Integer> tuples = new JavaPairRDD<String, Integer>();
            List<String> tuples = new ArrayList<String>();
            if (to != null && !to.isEmpty()) {
                for (String recipient : to.split(",")) {
                    String s = recipient.trim();
                    if (s.length() > 0) {
                        tuples.add(from + "|" + s);
                    }
                }
            }
            return tuples;
        }
    });
    JavaPairRDD<String, Integer> pairs = edges.mapToPair(new PairFunction<String, String, Integer>() {

        public Tuple2<String, Integer> call(final String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });
    JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {

        public Integer call(final Integer a, final Integer b) {
            return a + b;
        }
    });
    // Create a separate Configuration for saving data back to MongoDB.
    Configuration outputConfig = new Configuration();
    outputConfig.set("mongo.output.uri", "mongodb://localhost:27017/enron_mail.message_pairs");
    // Save this RDD as a Hadoop "file".
    // The path argument is unused; all documents will go to 'mongo.output.uri'.
    counts.saveAsNewAPIHadoopFile("file:///this-is-completely-unused", Object.class, BSONObject.class, MongoOutputFormat.class, outputConfig);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) BSONObject(org.bson.BSONObject) Tuple2(scala.Tuple2) BSONObject(org.bson.BSONObject) ArrayList(java.util.ArrayList) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Example 20 with SparkConf

use of org.apache.spark.SparkConf in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDDWithVisibility.

@Test
public void checkGetAllElementsInJavaRDDWithVisibility() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchemaWithVisibility.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 1; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        entity.putProperty("visibility", "public");
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        edge1.putProperty("visibility", "private");
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        edge2.putProperty("visibility", "public");
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User("user", Collections.singleton("public"));
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Create user with just public auth, and user with both private and public
    final Set<String> publicNotPrivate = new HashSet<>();
    publicNotPrivate.add("public");
    final User userWithPublicNotPrivate = new User("user1", publicNotPrivate);
    final Set<String> privateAuth = new HashSet<>();
    privateAuth.add("public");
    privateAuth.add("private");
    final User userWithPrivate = new User("user2", privateAuth);
    // Calculate correct results for 2 users
    final Set<Element> expectedElementsPublicNotPrivate = new HashSet<>();
    final Set<Element> expectedElementsPrivate = new HashSet<>();
    for (final Element element : elements) {
        expectedElementsPrivate.add(element);
        if (element.getProperty("visibility").equals("public")) {
            expectedElementsPublicNotPrivate.add(element);
        }
    }
    // Check get correct edges for user with just public
    GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    JavaRDD<Element> rdd = graph1.execute(rddQuery, userWithPublicNotPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(expectedElementsPublicNotPrivate, results);
    // Check get correct edges for user with both private and public
    rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, userWithPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    assertEquals(expectedElementsPrivate, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

SparkConf (org.apache.spark.SparkConf)83 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)46 Test (org.junit.Test)21 ArrayList (java.util.ArrayList)20 Configuration (org.apache.hadoop.conf.Configuration)20 Tuple2 (scala.Tuple2)15 Graph (uk.gov.gchq.gaffer.graph.Graph)13 DataOutputStream (java.io.DataOutputStream)11 File (java.io.File)10 HashSet (java.util.HashSet)10 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)10 Edge (uk.gov.gchq.gaffer.data.element.Edge)10 Element (uk.gov.gchq.gaffer.data.element.Element)10 Entity (uk.gov.gchq.gaffer.data.element.Entity)10 User (uk.gov.gchq.gaffer.user.User)10 Ignore (org.junit.Ignore)6 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 JavaHBaseContext (org.apache.hadoop.hbase.spark.JavaHBaseContext)5 Test (org.testng.annotations.Test)5 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)5