Search in sources :

Example 11 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNumberOfReducersBetweenMinAndMaxSpecified.

@Test
public void shouldSetNumberOfReducersBetweenMinAndMaxSpecified() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).maxReducers(20).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 10);
    assertTrue(job.getNumReduceTasks() <= 20);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(200).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 100);
    assertTrue(job.getNumReduceTasks() <= 200);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).maxReducers(2000).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 1000);
    assertTrue(job.getNumReduceTasks() <= 2000);
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Example 12 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoMoreThanMaxNumberOfReducersSpecified.

@Test
public void shouldSetNoMoreThanMaxNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile.toString()));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile.toString()).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir.toString()).addInputMapperPair(inputDir.toString(), TextMapperGeneratorImpl.class.getName()).maxReducers(10).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() <= 10);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir.toString()).addInputMapperPair(inputDir.toString(), TextMapperGeneratorImpl.class.getName()).maxReducers(100).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() <= 100);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir.toString()).addInputMapperPair(inputDir.toString(), TextMapperGeneratorImpl.class.getName()).maxReducers(1000).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() <= 1000);
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Aggregations

Test (org.junit.jupiter.api.Test)12 SplitStoreFromFile (uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile)12 Context (uk.gov.gchq.gaffer.store.Context)12 User (uk.gov.gchq.gaffer.user.User)12 Operation (uk.gov.gchq.gaffer.operation.Operation)8 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)8 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)8 DiscardOutput (uk.gov.gchq.gaffer.operation.impl.DiscardOutput)7 Count (uk.gov.gchq.gaffer.operation.impl.Count)6 GetAdjacentIds (uk.gov.gchq.gaffer.operation.impl.get.GetAdjacentIds)6 GetAllElements (uk.gov.gchq.gaffer.operation.impl.get.GetAllElements)6 CountGroups (uk.gov.gchq.gaffer.operation.impl.CountGroups)5 Validate (uk.gov.gchq.gaffer.operation.impl.Validate)5 BufferedWriter (java.io.BufferedWriter)4 FileWriter (java.io.FileWriter)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 Job (org.apache.hadoop.mapreduce.Job)4 AddElementsFromHdfs (uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs)4