Search in sources :

Example 1 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetupAccumuloPartitionerWhenSetupJobForGivenPartitionerFlag.

private void shouldSetupAccumuloPartitionerWhenSetupJobForGivenPartitionerFlag(final String partitionerFlag) throws IOException {
    // Given
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
        writer.write("1");
    }
    final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
    final Job job = mock(Job.class);
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).option(AccumuloStoreConstants.OPERATION_HDFS_USE_ACCUMULO_PARTITIONER, partitionerFlag).option(AccumuloStoreConstants.OPERATION_HDFS_USE_PROVIDED_SPLITS_FILE, "true").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, splitsFile).build();
    final AccumuloStore store = mock(AccumuloStore.class);
    final AccumuloProperties properties = mock(AccumuloProperties.class);
    given(job.getConfiguration()).willReturn(localConf);
    // When
    factory.setupJob(job, operation, store);
    // Then
    if ("false".equals(partitionerFlag)) {
        verify(job, never()).setNumReduceTasks(Mockito.anyInt());
        verify(job, never()).setPartitionerClass(Mockito.any(Class.class));
        assertNull(job.getConfiguration().get(RangePartitioner.class.getName() + ".cutFile"));
    } else {
        verify(job).setNumReduceTasks(2);
        verify(job).setPartitionerClass(KeyRangePartitioner.class);
        assertEquals(splitsFile, job.getConfiguration().get(RangePartitioner.class.getName() + ".cutFile"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) AccumuloProperties(uk.gov.gchq.gaffer.accumulostore.AccumuloProperties) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) RangePartitioner(org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner) KeyRangePartitioner(org.apache.accumulo.core.client.mapreduce.lib.partition.KeyRangePartitioner) AccumuloStore(uk.gov.gchq.gaffer.accumulostore.AccumuloStore) MockAccumuloStore(uk.gov.gchq.gaffer.accumulostore.MockAccumuloStore) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.

the class AddElementsFromHdfsExample method addElementsFromHdfs.

@SuppressFBWarnings("DLS_DEAD_LOCAL_STORE")
public void addElementsFromHdfs() {
    // ---------------------------------------------------------
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().addInputMapperPair("/path/to/input/fileOrFolder", TextMapperGeneratorImpl.class.getName()).outputPath("/path/to/output/folder").failurePath("/path/to/failure/folder").splitsFilePath("/path/to/splits/file").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
    // ---------------------------------------------------------
    showJavaExample(null);
}
Also used : AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) TextJobInitialiser(uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 3 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class FederatedAddGraphHandlerTest method shouldAddGraphAndAddSupportedOperations.

@Test
public void shouldAddGraphAndAddSupportedOperations() throws Exception {
    store.initialise(FEDERATEDSTORE_GRAPH_ID, null, federatedStoreProperties);
    Schema expectedSchema = new Schema.Builder().build();
    assertFalse(store.isSupported(GetElementsInRanges.class), "Empty FederatedStore should NOT support GetElementsInRanges");
    assertFalse(store.isSupported(AddElementsFromHdfs.class), "Empty FederatedStore should NOT support AddElementsFromHdfs");
    FederatedAddGraphHandler federatedAddGraphHandler = new FederatedAddGraphHandler();
    federatedAddGraphHandler.doOperation(new AddGraph.Builder().graphId(EXPECTED_GRAPH_ID).schema(expectedSchema).storeProperties(PROPERTIES).build(), new Context(testUser), store);
    assertTrue(store.isSupported(GetElementsInRanges.class), "FederatedStore with an added Accumulo store should support GetElementsInRanges");
    assertTrue(store.isSupported(AddElementsFromHdfs.class), "FederatedStore with an added Accumulo store should support AddElementsFromHdfs");
}
Also used : AddGraph(uk.gov.gchq.gaffer.federatedstore.operation.AddGraph) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GetElementsInRanges(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsInRanges) Test(org.junit.jupiter.api.Test)

Example 4 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetupJob.

@Test
public void shouldSetupJob() throws IOException {
    // Given
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
        writer.write("1");
    }
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = mock(Job.class);
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir.toString()).addInputMapperPair(inputDir.toString(), TextMapperGeneratorImpl.class.getName()).useProvidedSplits(true).splitsFilePath(splitsFile.toString()).build();
    final AccumuloStore store = mock(AccumuloStore.class);
    given(job.getConfiguration()).willReturn(localConf);
    // When
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    verify(job).setJarByClass(factory.getClass());
    verify(job).setJobName(String.format(AccumuloAddElementsFromHdfsJobFactory.INGEST_HDFS_DATA_GENERATOR_S_OUTPUT_S, TextMapperGeneratorImpl.class.getName(), outputDir));
    verify(job).setMapperClass(AddElementsFromHdfsMapper.class);
    verify(job).setMapOutputKeyClass(Key.class);
    verify(job).setMapOutputValueClass(Value.class);
    verify(job).setCombinerClass(AccumuloKeyValueReducer.class);
    verify(job).setReducerClass(AccumuloKeyValueReducer.class);
    verify(job).setOutputKeyClass(Key.class);
    verify(job).setOutputValueClass(Value.class);
    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    assertEquals(fs.makeQualified(new Path(outputDir)).toString(), job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir"));
    verify(job).setNumReduceTasks(2);
    verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
    assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
}
Also used : Path(org.apache.hadoop.fs.Path) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) SingleUseMiniAccumuloStore(uk.gov.gchq.gaffer.accumulostore.SingleUseMiniAccumuloStore) AccumuloStore(uk.gov.gchq.gaffer.accumulostore.AccumuloStore) Job(org.apache.hadoop.mapreduce.Job) GafferRangePartitioner(uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.partitioner.GafferRangePartitioner) JobConf(org.apache.hadoop.mapred.JobConf) BufferedWriter(java.io.BufferedWriter) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Example 5 with AddElementsFromHdfs

use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoLessThanMinNumberOfReducersSpecified.

@Test
public void shouldSetNoLessThanMinNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 10);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 100);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 1000);
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Aggregations

AddElementsFromHdfs (uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs)12 BufferedWriter (java.io.BufferedWriter)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 Path (org.apache.hadoop.fs.Path)7 JobConf (org.apache.hadoop.mapred.JobConf)7 Job (org.apache.hadoop.mapreduce.Job)7 Test (org.junit.jupiter.api.Test)6 User (uk.gov.gchq.gaffer.user.User)6 AbstractJobFactoryTest (uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)5 Context (uk.gov.gchq.gaffer.store.Context)5 FileWriter (java.io.FileWriter)4 TextJobInitialiser (uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser)4 SplitStoreFromFile (uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile)4 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)3 OutputStreamWriter (java.io.OutputStreamWriter)3 AccumuloStore (uk.gov.gchq.gaffer.accumulostore.AccumuloStore)3 SingleUseMiniAccumuloStore (uk.gov.gchq.gaffer.accumulostore.SingleUseMiniAccumuloStore)2 GafferRangePartitioner (uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.partitioner.GafferRangePartitioner)2 Graph (uk.gov.gchq.gaffer.graph.Graph)2 HashMap (java.util.HashMap)1