Search in sources :

Example 6 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AddElementsFromHdfsIT method addElementsFromHdfs.

private void addElementsFromHdfs(Class<? extends AccumuloKeyPackage> keyPackageClass) throws Exception {
    // Given
    createInputFile();
    final Graph graph = createGraph(keyPackageClass);
    // When
    graph.execute(new AddElementsFromHdfs.Builder().inputPaths(Collections.singletonList(inputDir)).outputPath(outputDir).failurePath(failureDir).mapperGenerator(TextMapperGeneratorImpl.class).jobInitialiser(new TextJobInitialiser()).option(AccumuloStoreConstants.OPERATION_HDFS_USE_PROVIDED_SPLITS_FILE, "false").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, splitsFile).build(), new User());
    // Then
    final CloseableIterable<Element> elements = graph.execute(new GetAllElements<>(), new User());
    final List<Element> elementList = Lists.newArrayList(elements);
    assertEquals(NUM_ENTITIES, elementList.size());
    for (int i = 0; i < NUM_ENTITIES; i++) {
        assertEquals(TestGroups.ENTITY, elementList.get(i).getGroup());
        assertEquals(VERTEX_ID_PREFIX + i, ((Entity) elementList.get(i)).getVertex());
    }
}
Also used : AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) Graph(uk.gov.gchq.gaffer.graph.Graph) User(uk.gov.gchq.gaffer.user.User) Element(uk.gov.gchq.gaffer.data.element.Element) TextJobInitialiser(uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser)

Example 7 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.

the class AddElementsFromHdfsExample method addElementsFromHdfsMainMethod.

@SuppressFBWarnings("REC_CATCH_EXCEPTION")
private void addElementsFromHdfsMainMethod() {
    try {
        // ---------------------------------------------------------
        if (5 != args.length) {
            System.err.println("Usage: hadoop jar custom-hdfs-import-<version>-shaded.jar <inputPath> <outputPath> <failurePath> <schemaPath> <storePropertiesPath>");
            System.exit(1);
        }
        final String inputPath = args[0];
        final String outputPath = args[1];
        final String failurePath = args[2];
        final String schemaPath = args[3];
        final String storePropertiesPath = args[4];
        final Graph graph = new Graph.Builder().storeProperties(storePropertiesPath).addSchemas(Paths.get(schemaPath)).build();
        final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().addInputMapperPair(inputPath, TextMapperGeneratorImpl.class.getName()).outputPath(outputPath).failurePath(failurePath).splitsFilePath("/tmp/splits").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
        graph.execute(operation, new User());
    // ---------------------------------------------------------
    } catch (final Exception e) {
    // ignore error
    }
    showJavaExample("Example content for a main method that takes 5 arguments and runs an " + AddElementsFromHdfs.class.getSimpleName());
}
Also used : AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) Graph(uk.gov.gchq.gaffer.graph.Graph) User(uk.gov.gchq.gaffer.user.User) TextJobInitialiser(uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser) OperationException(uk.gov.gchq.gaffer.operation.OperationException) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 8 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.

the class AddElementsFromHdfsExample method addElementsFromHdfsWithMultipleInput.

@SuppressFBWarnings("DLS_DEAD_LOCAL_STORE")
public void addElementsFromHdfsWithMultipleInput() {
    // ---------------------------------------------------------
    final Map<String, String> inputMapperMap = new HashMap<>();
    inputMapperMap.put("/path/to/first/inputFileOrFolder", TextMapperGeneratorImpl.class.getName());
    inputMapperMap.put("/path/to/second/inputFileOrFolder", TextMapperGeneratorImpl.class.getName());
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().inputMapperPairs(inputMapperMap).addInputMapperPair("/path/to/third/inputFileOrFolder", TextMapperGeneratorImpl.class.getName()).outputPath("/path/to/output/folder").failurePath("/path/to/failure/folder").splitsFilePath("/path/to/splits/file").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
    // ---------------------------------------------------------
    showJavaExample(null);
}
Also used : AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) HashMap(java.util.HashMap) TextJobInitialiser(uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser) TextMapperGeneratorImpl(uk.gov.gchq.gaffer.doc.operation.generator.TextMapperGeneratorImpl) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 9 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldThrowExceptionWhenMaxReducersSetOutsideOfRange.

@Test
public void shouldThrowExceptionWhenMaxReducersSetOutsideOfRange() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(101).splitsFilePath("target/data/splits.txt").build();
    // Then
    try {
        factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
        fail("Exception expected");
    } catch (IllegalArgumentException e) {
        assertTrue(e.getMessage().contains("not a valid range"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Example 10 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNumberOfReducersBetweenMinAndMaxSpecified.

@Test
public void shouldSetNumberOfReducersBetweenMinAndMaxSpecified() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).maxReducers(20).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 10);
    assertTrue(job.getNumReduceTasks() <= 20);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(200).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 100);
    assertTrue(job.getNumReduceTasks() <= 200);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).maxReducers(2000).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 1000);
    assertTrue(job.getNumReduceTasks() <= 2000);
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Aggregations

AddElementsFromHdfs (uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs)12 BufferedWriter (java.io.BufferedWriter)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 Path (org.apache.hadoop.fs.Path)7 JobConf (org.apache.hadoop.mapred.JobConf)7 Job (org.apache.hadoop.mapreduce.Job)7 Test (org.junit.jupiter.api.Test)6 User (uk.gov.gchq.gaffer.user.User)6 AbstractJobFactoryTest (uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)5 Context (uk.gov.gchq.gaffer.store.Context)5 FileWriter (java.io.FileWriter)4 TextJobInitialiser (uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser)4 SplitStoreFromFile (uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile)4 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)3 OutputStreamWriter (java.io.OutputStreamWriter)3 AccumuloStore (uk.gov.gchq.gaffer.accumulostore.AccumuloStore)3 SingleUseMiniAccumuloStore (uk.gov.gchq.gaffer.accumulostore.SingleUseMiniAccumuloStore)2 GafferRangePartitioner (uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.partitioner.GafferRangePartitioner)2 Graph (uk.gov.gchq.gaffer.graph.Graph)2 HashMap (java.util.HashMap)1