use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetupAccumuloPartitionerWhenSetupJobForGivenPartitionerFlag.
private void shouldSetupAccumuloPartitionerWhenSetupJobForGivenPartitionerFlag(final String partitionerFlag) throws IOException {
// Given
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
writer.write("1");
}
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = mock(Job.class);
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).option(AccumuloStoreConstants.OPERATION_HDFS_USE_ACCUMULO_PARTITIONER, partitionerFlag).option(AccumuloStoreConstants.OPERATION_HDFS_USE_PROVIDED_SPLITS_FILE, "true").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, splitsFile).build();
final AccumuloStore store = mock(AccumuloStore.class);
final AccumuloProperties properties = mock(AccumuloProperties.class);
given(job.getConfiguration()).willReturn(localConf);
// When
factory.setupJob(job, operation, store);
// Then
if ("false".equals(partitionerFlag)) {
verify(job, never()).setNumReduceTasks(Mockito.anyInt());
verify(job, never()).setPartitionerClass(Mockito.any(Class.class));
assertNull(job.getConfiguration().get(RangePartitioner.class.getName() + ".cutFile"));
} else {
verify(job).setNumReduceTasks(2);
verify(job).setPartitionerClass(KeyRangePartitioner.class);
assertEquals(splitsFile, job.getConfiguration().get(RangePartitioner.class.getName() + ".cutFile"));
}
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.
the class AddElementsFromHdfsExample method addElementsFromHdfs.
@SuppressFBWarnings("DLS_DEAD_LOCAL_STORE")
public void addElementsFromHdfs() {
// ---------------------------------------------------------
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().addInputMapperPair("/path/to/input/fileOrFolder", TextMapperGeneratorImpl.class.getName()).outputPath("/path/to/output/folder").failurePath("/path/to/failure/folder").splitsFilePath("/path/to/splits/file").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
// ---------------------------------------------------------
showJavaExample(null);
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class FederatedAddGraphHandlerTest method shouldAddGraphAndAddSupportedOperations.
@Test
public void shouldAddGraphAndAddSupportedOperations() throws Exception {
store.initialise(FEDERATEDSTORE_GRAPH_ID, null, federatedStoreProperties);
Schema expectedSchema = new Schema.Builder().build();
assertFalse(store.isSupported(GetElementsInRanges.class), "Empty FederatedStore should NOT support GetElementsInRanges");
assertFalse(store.isSupported(AddElementsFromHdfs.class), "Empty FederatedStore should NOT support AddElementsFromHdfs");
FederatedAddGraphHandler federatedAddGraphHandler = new FederatedAddGraphHandler();
federatedAddGraphHandler.doOperation(new AddGraph.Builder().graphId(EXPECTED_GRAPH_ID).schema(expectedSchema).storeProperties(PROPERTIES).build(), new Context(testUser), store);
assertTrue(store.isSupported(GetElementsInRanges.class), "FederatedStore with an added Accumulo store should support GetElementsInRanges");
assertTrue(store.isSupported(AddElementsFromHdfs.class), "FederatedStore with an added Accumulo store should support AddElementsFromHdfs");
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetupJob.
@Test
public void shouldSetupJob() throws IOException {
// Given
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
writer.write("1");
}
final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
final Job job = mock(Job.class);
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir.toString()).addInputMapperPair(inputDir.toString(), TextMapperGeneratorImpl.class.getName()).useProvidedSplits(true).splitsFilePath(splitsFile.toString()).build();
final AccumuloStore store = mock(AccumuloStore.class);
given(job.getConfiguration()).willReturn(localConf);
// When
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
verify(job).setJarByClass(factory.getClass());
verify(job).setJobName(String.format(AccumuloAddElementsFromHdfsJobFactory.INGEST_HDFS_DATA_GENERATOR_S_OUTPUT_S, TextMapperGeneratorImpl.class.getName(), outputDir));
verify(job).setMapperClass(AddElementsFromHdfsMapper.class);
verify(job).setMapOutputKeyClass(Key.class);
verify(job).setMapOutputValueClass(Value.class);
verify(job).setCombinerClass(AccumuloKeyValueReducer.class);
verify(job).setReducerClass(AccumuloKeyValueReducer.class);
verify(job).setOutputKeyClass(Key.class);
verify(job).setOutputValueClass(Value.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
assertEquals(fs.makeQualified(new Path(outputDir)).toString(), job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir"));
verify(job).setNumReduceTasks(2);
verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoLessThanMinNumberOfReducersSpecified.
@Test
public void shouldSetNoLessThanMinNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
// Given
store.initialise("graphId", SCHEMA, PROPERTIES);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new Context(new User()));
final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 10);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 100);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 1000);
}
Aggregations