use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.operation.SplitTable in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoLessThanMinNumberOfReducersSpecified.
@Test
public void shouldSetNoLessThanMinNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
// Given
final MockAccumuloStore store = new MockAccumuloStore();
final Schema schema = Schema.fromJson(StreamUtil.schemas(AccumuloAddElementsFromHdfsJobFactoryTest.class));
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(StreamUtil.storeProps(AccumuloAddElementsFromHdfsJobFactoryTest.class));
store.initialise(schema, properties);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitTable splitTable = new SplitTable.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new User());
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "10").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 10);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "100").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 100);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "1000").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 1000);
}
use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.operation.SplitTable in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoMoreThanMaxNumberOfReducersSpecified.
@Test
public void shouldSetNoMoreThanMaxNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
// Given
final MockAccumuloStore store = new MockAccumuloStore();
final Schema schema = Schema.fromJson(StreamUtil.schemas(AccumuloAddElementsFromHdfsJobFactoryTest.class));
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(StreamUtil.storeProps(AccumuloAddElementsFromHdfsJobFactoryTest.class));
store.initialise(schema, properties);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitTable splitTable = new SplitTable.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new User());
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "10").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() <= 10);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "100").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() <= 100);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "1000").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() <= 1000);
}
use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.operation.SplitTable in project Gaffer by gchq.
the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNumberOfReducersBetweenMinAndMaxSpecified.
@Test
public void shouldSetNumberOfReducersBetweenMinAndMaxSpecified() throws IOException, StoreException, OperationException {
// Given
final MockAccumuloStore store = new MockAccumuloStore();
final Schema schema = Schema.fromJson(StreamUtil.schemas(AccumuloAddElementsFromHdfsJobFactoryTest.class));
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(StreamUtil.storeProps(AccumuloAddElementsFromHdfsJobFactoryTest.class));
store.initialise(schema, properties);
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
for (int i = 100; i < 200; i++) {
writer.write(i + "\n");
}
writer.close();
final SplitTable splitTable = new SplitTable.Builder().inputPath(splitsFile).build();
store.execute(splitTable, new User());
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = Job.getInstance(localConf);
// When
AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "10").option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "20").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 10);
assertTrue(job.getNumReduceTasks() <= 20);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "100").option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "200").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 100);
assertTrue(job.getNumReduceTasks() <= 200);
// When
operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).mapperGenerator(TextMapperGeneratorImpl.class).option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MIN_REDUCERS, "1000").option(AccumuloStoreConstants.OPERATION_BULK_IMPORT_MAX_REDUCERS, "2000").option(AccumuloStoreConstants.OPERATION_HDFS_SPLITS_FILE_PATH, "target/data/splits.txt").build();
factory.setupJobConf(localConf, operation, store);
factory.setupJob(job, operation, store);
// Then
assertTrue(job.getNumReduceTasks() >= 1000);
assertTrue(job.getNumReduceTasks() <= 2000);
}
use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.operation.SplitTable in project Gaffer by gchq.
the class SplitTableTest method builderShouldCreatePopulatedOperation.
@Test
@Override
public void builderShouldCreatePopulatedOperation() {
final SplitTable splitTable = new SplitTable.Builder().inputPath(INPUT_DIRECTORY).option(TEST_OPTION_KEY, "true").build();
assertEquals(INPUT_DIRECTORY, splitTable.getInputPath());
assertEquals("true", splitTable.getOption(TEST_OPTION_KEY));
}
use of uk.gov.gchq.gaffer.accumulostore.operation.hdfs.operation.SplitTable in project Gaffer by gchq.
the class SplitTableTest method shouldSerialiseAndDeserialiseOperation.
@Test
@Override
public void shouldSerialiseAndDeserialiseOperation() throws SerialisationException {
// Given
final SplitTable op = new SplitTable();
op.setInputPath(INPUT_DIRECTORY);
// When
byte[] json = serialiser.serialise(op, true);
final SplitTable deserialisedOp = serialiser.deserialise(json, SplitTable.class);
// Then
assertEquals(INPUT_DIRECTORY, deserialisedOp.getInputPath());
}
Aggregations