Search in sources :

Example 6 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AddOperationsToChainTest method shouldHandleNestedOperationChain.

@Test
public void shouldHandleNestedOperationChain() throws SerialisationException {
    // Given
    AddOperationsToChain hook = fromJson(ADD_OPERATIONS_TO_CHAIN_RESOURCE_PATH);
    Operation discardOutput = new DiscardOutput();
    Operation splitStore = new SplitStoreFromFile();
    Operation validate = new Validate();
    Operation getAdjacentIds = new GetAdjacentIds();
    Operation count = new Count<>();
    Operation countGroups = new CountGroups();
    Operation getElements = new GetElements();
    If ifOp = new If.Builder<>().conditional(new Conditional(new Exists(), new GetElements())).then(new GetElements()).otherwise(new GetAllElements()).build();
    Operation getAllElements = new GetAllElements();
    Operation limit = new Limit<>();
    final OperationChain opChain = new OperationChain.Builder().first(getAdjacentIds).then(new OperationChain.Builder().first(getElements).then(getAllElements).build()).then(ifOp).build();
    // When
    hook.preExecute(opChain, new Context(new User()));
    // Then
    final OperationChain expectedOpChain = new OperationChain.Builder().first(discardOutput).then(splitStore).then(validate).then(getAdjacentIds).then(count).then(discardOutput).then((Operation) new OperationChain.Builder().first(countGroups).then(getElements).then(getAllElements).then(limit).then(validate).build()).then(new If.Builder<>().conditional(new Conditional(new Exists(), new OperationChain<>(new CountGroups(), new GetElements()))).then(new OperationChain<>(new CountGroups(), new GetElements())).otherwise(new OperationChain<>(new GetAllElements(), new Limit<>(), new Validate())).build()).then(new Count()).build();
    JsonAssert.assertEquals(JSONSerialiser.serialise(expectedOpChain), JSONSerialiser.serialise(opChain));
}
Also used : Context(uk.gov.gchq.gaffer.store.Context) GetAdjacentIds(uk.gov.gchq.gaffer.operation.impl.get.GetAdjacentIds) User(uk.gov.gchq.gaffer.user.User) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) Conditional(uk.gov.gchq.gaffer.operation.util.Conditional) Operation(uk.gov.gchq.gaffer.operation.Operation) Count(uk.gov.gchq.gaffer.operation.impl.Count) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) CountGroups(uk.gov.gchq.gaffer.operation.impl.CountGroups) Validate(uk.gov.gchq.gaffer.operation.impl.Validate) Exists(uk.gov.gchq.koryphe.impl.predicate.Exists) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) GetAllElements(uk.gov.gchq.gaffer.operation.impl.get.GetAllElements) DiscardOutput(uk.gov.gchq.gaffer.operation.impl.DiscardOutput) Limit(uk.gov.gchq.gaffer.operation.impl.Limit) If(uk.gov.gchq.gaffer.operation.impl.If) Test(org.junit.jupiter.api.Test)

Example 7 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldSetNoLessThanMinNumberOfReducersSpecified.

@Test
public void shouldSetNoLessThanMinNumberOfReducersSpecified() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(10).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 10);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 100);
    // When
    operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(1000).splitsFilePath("target/data/splits.txt").build();
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
    // Then
    assertTrue(job.getNumReduceTasks() >= 1000);
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Example 8 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AddOperationsToChainTest method shouldFailQuietlyIfNestedOperationsCannotBeModified.

@Test
public void shouldFailQuietlyIfNestedOperationsCannotBeModified() throws SerialisationException {
    // Given
    AddOperationsToChain hook = fromJson(ADD_OPERATIONS_TO_CHAIN_RESOURCE_PATH);
    Operation discardOutput = new DiscardOutput();
    Operation splitStore = new SplitStoreFromFile();
    Operation validate = new Validate();
    Operation getAdjacentIds = new GetAdjacentIds();
    Operation count = new Count<>();
    Operation getElements = new GetElements();
    Operation getAllElements = new GetAllElements();
    TestUnmodifiableOperationsImpl nestedUnmodifiableOps = new TestUnmodifiableOperationsImpl(Arrays.asList(getAllElements, getElements));
    final OperationChain opChain = new OperationChain.Builder().first(getAdjacentIds).then(nestedUnmodifiableOps).build();
    // When
    hook.preExecute(opChain, new Context(new User()));
    // Then
    final OperationChain expectedOpChain = new OperationChain.Builder().first(discardOutput).then(splitStore).then(validate).then(getAdjacentIds).then(count).then(discardOutput).then(nestedUnmodifiableOps).then(count).build();
    JsonAssert.assertEquals(JSONSerialiser.serialise(expectedOpChain), JSONSerialiser.serialise(opChain));
}
Also used : Context(uk.gov.gchq.gaffer.store.Context) GetAdjacentIds(uk.gov.gchq.gaffer.operation.impl.get.GetAdjacentIds) User(uk.gov.gchq.gaffer.user.User) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) Operation(uk.gov.gchq.gaffer.operation.Operation) Count(uk.gov.gchq.gaffer.operation.impl.Count) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) Validate(uk.gov.gchq.gaffer.operation.impl.Validate) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) GetAllElements(uk.gov.gchq.gaffer.operation.impl.get.GetAllElements) DiscardOutput(uk.gov.gchq.gaffer.operation.impl.DiscardOutput) TestUnmodifiableOperationsImpl(uk.gov.gchq.gaffer.operation.TestUnmodifiableOperationsImpl) Test(org.junit.jupiter.api.Test)

Example 9 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AddOperationsToChainTest method shouldHandleIfOperationWithNoConditionalOrOtherwise.

@Test
public void shouldHandleIfOperationWithNoConditionalOrOtherwise() throws SerialisationException {
    // Given
    AddOperationsToChain hook = fromJson(ADD_OPERATIONS_TO_CHAIN_RESOURCE_PATH);
    Operation discardOutput = new DiscardOutput();
    Operation splitStore = new SplitStoreFromFile();
    If ifOp = new If.Builder<>().then(new GetElements()).build();
    final OperationChain opChain = new OperationChain.Builder().first(ifOp).build();
    // When
    hook.preExecute(opChain, new Context(new User()));
    // Then
    final OperationChain expectedOpChain = new OperationChain.Builder().first(discardOutput).then(splitStore).then(new If.Builder<>().then(new OperationChain<>(new CountGroups(), new GetElements())).build()).then(new Count()).build();
    JsonAssert.assertEquals(JSONSerialiser.serialise(expectedOpChain), JSONSerialiser.serialise(opChain));
}
Also used : Context(uk.gov.gchq.gaffer.store.Context) User(uk.gov.gchq.gaffer.user.User) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) Operation(uk.gov.gchq.gaffer.operation.Operation) Count(uk.gov.gchq.gaffer.operation.impl.Count) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) CountGroups(uk.gov.gchq.gaffer.operation.impl.CountGroups) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) DiscardOutput(uk.gov.gchq.gaffer.operation.impl.DiscardOutput) If(uk.gov.gchq.gaffer.operation.impl.If) Test(org.junit.jupiter.api.Test)

Example 10 with SplitStoreFromFile

use of uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile in project Gaffer by gchq.

the class AccumuloAddElementsFromHdfsJobFactoryTest method shouldThrowExceptionWhenMaxReducersSetOutsideOfRange.

@Test
public void shouldThrowExceptionWhenMaxReducersSetOutsideOfRange() throws IOException, StoreException, OperationException {
    // Given
    store.initialise("graphId", SCHEMA, PROPERTIES);
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    final BufferedWriter writer = new BufferedWriter(new FileWriter(splitsFile));
    for (int i = 100; i < 200; i++) {
        writer.write(i + "\n");
    }
    writer.close();
    final SplitStoreFromFile splitTable = new SplitStoreFromFile.Builder().inputPath(splitsFile).build();
    store.execute(splitTable, new Context(new User()));
    final AccumuloAddElementsFromHdfsJobFactory factory = getJobFactory();
    final Job job = Job.getInstance(localConf);
    // When
    AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder().outputPath(outputDir).addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName()).minReducers(100).maxReducers(101).splitsFilePath("target/data/splits.txt").build();
    // Then
    try {
        factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
        fail("Exception expected");
    } catch (IllegalArgumentException e) {
        assertTrue(e.getMessage().contains("not a valid range"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Context(uk.gov.gchq.gaffer.store.Context) AddElementsFromHdfs(uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs) User(uk.gov.gchq.gaffer.user.User) FileWriter(java.io.FileWriter) SplitStoreFromFile(uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile) BufferedWriter(java.io.BufferedWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) AbstractJobFactoryTest(uk.gov.gchq.gaffer.hdfs.operation.hander.job.factory.AbstractJobFactoryTest)

Aggregations

Test (org.junit.jupiter.api.Test)12 SplitStoreFromFile (uk.gov.gchq.gaffer.operation.impl.SplitStoreFromFile)12 Context (uk.gov.gchq.gaffer.store.Context)12 User (uk.gov.gchq.gaffer.user.User)12 Operation (uk.gov.gchq.gaffer.operation.Operation)8 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)8 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)8 DiscardOutput (uk.gov.gchq.gaffer.operation.impl.DiscardOutput)7 Count (uk.gov.gchq.gaffer.operation.impl.Count)6 GetAdjacentIds (uk.gov.gchq.gaffer.operation.impl.get.GetAdjacentIds)6 GetAllElements (uk.gov.gchq.gaffer.operation.impl.get.GetAllElements)6 CountGroups (uk.gov.gchq.gaffer.operation.impl.CountGroups)5 Validate (uk.gov.gchq.gaffer.operation.impl.Validate)5 BufferedWriter (java.io.BufferedWriter)4 FileWriter (java.io.FileWriter)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 Job (org.apache.hadoop.mapreduce.Job)4 AddElementsFromHdfs (uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs)4