use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AddElementsFromHdfsIT method addElementsFromHdfs.
private void addElementsFromHdfs(Class<? extends AccumuloKeyPackage> keyPackageClass) throws Exception {
// Given
createInputFile();
final Graph graph = createGraph(keyPackageClass);
// When
graph.execute(new AddElementsFromHdfs.Builder().inputPaths(Collections.singletonList(inputDir)).outputPath(outputDir).failurePath(failureDir).mapperGenerator(TextMapperGeneratorImpl.class).jobInitialiser(new TextJobInitialiser()).option(AccumuloStoreConstants.OPERATION_HDFS_USE_PROVIDED_SPLITS_FILE, "false").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, splitsFile).build(), new User());
// Then
final CloseableIterable<Element> elements = graph.execute(new GetAllElements<>(), new User());
final List<Element> elementList = Lists.newArrayList(elements);
assertEquals(NUM_ENTITIES, elementList.size());
for (int i = 0; i < NUM_ENTITIES; i++) {
assertEquals(TestGroups.ENTITY, elementList.get(i).getGroup());
assertEquals(VERTEX_ID_PREFIX + i, ((Entity) elementList.get(i)).getVertex());
}
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.
the class AddElementsFromHdfsExample method addElementsFromHdfsMainMethod.
@SuppressFBWarnings("REC_CATCH_EXCEPTION")
private void addElementsFromHdfsMainMethod() {
try {
// ---------------------------------------------------------
if (5 != args.length) {
System.err.println("Usage: hadoop jar custom-hdfs-import-<version>-shaded.jar <inputPath> <outputPath> <failurePath> <schemaPath> <storePropertiesPath>");
System.exit(1);
}
final String inputPath = args[0];
final String outputPath = args[1];
final String failurePath = args[2];
final String schemaPath = args[3];
final String storePropertiesPath = args[4];
final Graph graph = new Graph.Builder().storeProperties(storePropertiesPath).addSchemas(Paths.get(schemaPath)).build();
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().addInputMapperPair(inputPath, TextMapperGeneratorImpl.class.getName()).outputPath(outputPath).failurePath(failurePath).splitsFilePath("/tmp/splits").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
graph.execute(operation, new User());
// ---------------------------------------------------------
} catch (final Exception e) {
// ignore error
}
showJavaExample("Example content for a main method that takes 5 arguments and runs an " + AddElementsFromHdfs.class.getSimpleName());
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project gaffer-doc by gchq.
the class AddElementsFromHdfsExample method addElementsFromHdfsWithMultipleInput.
@SuppressFBWarnings("DLS_DEAD_LOCAL_STORE")
public void addElementsFromHdfsWithMultipleInput() {
// ---------------------------------------------------------
final Map<String, String> inputMapperMap = new HashMap<>();
inputMapperMap.put("/path/to/first/inputFileOrFolder", TextMapperGeneratorImpl.class.getName());
inputMapperMap.put("/path/to/second/inputFileOrFolder", TextMapperGeneratorImpl.class.getName());
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().inputMapperPairs(inputMapperMap).addInputMapperPair("/path/to/third/inputFileOrFolder", TextMapperGeneratorImpl.class.getName()).outputPath("/path/to/output/folder").failurePath("/path/to/failure/folder").splitsFilePath("/path/to/splits/file").workingPath("/tmp/workingDir").useProvidedSplits(false).jobInitialiser(new TextJobInitialiser()).minReducers(10).maxReducers(100).build();
// ---------------------------------------------------------
showJavaExample(null);
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldThrowExceptionWhenMaxReducersSetOutsideOfRange.
@Test
public void shouldThrowExceptionWhenMaxReducersSetOutsideOfRange() throws IOException, StoreException, OperationException {
// Given
store.initialise("graphId", SCHEMA, PROPERTIES);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new Context(new User()));
final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(101).splitsFilePath("target/data/splits.txt").build();
// Then
try {
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
fail("Exception expected");
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains("not a valid range"));
}
}
use of uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNumberOfReducersBetweenMinAndMaxSpecified.
@Test
public void shouldSetNumberOfReducersBetweenMinAndMaxSpecified() throws IOException, StoreException, OperationException {
// Given
store.initialise("graphId", SCHEMA, PROPERTIES);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new Context(new User()));
final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).maxReducers(20).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 10);
assertTrue(job.getNumReduceTasks() <= 20);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(200).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 100);
assertTrue(job.getNumReduceTasks() <= 200);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).maxReducers(2000).splitsFilePath("target/data/splits.txt").build();
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
assertTrue(job.getNumReduceTasks() >= 1000);
assertTrue(job.getNumReduceTasks() <= 2000);
}
Aggregations