Search in sources :

Example 31 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Row(co.cask.cdap.api.dataset.table.Row)

Example 32 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class WorkerProgramRunnerTest method startProgram.

private ProgramController startProgram(ApplicationWithPrograms app, Class<?> programClass) throws Throwable {
    final AtomicReference<Throwable> errorCause = new AtomicReference<>();
    final ProgramController controller = AppFabricTestHelper.submit(app, programClass.getName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER);
    runningPrograms.add(controller);
    controller.addListener(new AbstractListener() {

        @Override
        public void error(Throwable cause) {
            errorCause.set(cause);
        }

        @Override
        public void killed() {
            errorCause.set(new RuntimeException("Killed"));
        }
    }, Threads.SAME_THREAD_EXECUTOR);
    Tasks.waitFor(ProgramController.State.ALIVE, new Callable<ProgramController.State>() {

        @Override
        public ProgramController.State call() throws Exception {
            Throwable t = errorCause.get();
            if (t != null) {
                Throwables.propagateIfInstanceOf(t, Exception.class);
                throw Throwables.propagate(t);
            }
            return controller.getState();
        }
    }, 30, TimeUnit.SECONDS);
    return controller;
}
Also used : ProgramController(co.cask.cdap.app.runtime.ProgramController) AtomicReference(java.util.concurrent.atomic.AtomicReference) AbstractListener(co.cask.cdap.internal.app.runtime.AbstractListener) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) IOException(java.io.IOException)

Example 33 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleInputsTest method testMapperOutputTypeChecking.

@Test
public void testMapperOutputTypeChecking() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingInconsistentMappers.class);
    // the mapreduce with consistent mapper types will succeed
    Assert.assertTrue(runProgram(app, AppWithMapReduceUsingInconsistentMappers.MapReduceWithConsistentMapperTypes.class, new BasicArguments()));
    // the mapreduce with mapper classes of inconsistent output types will fail, whether the mappers are set through
    // CDAP APIs or also directly on the job
    Assert.assertFalse(runProgram(app, AppWithMapReduceUsingInconsistentMappers.MapReduceWithInconsistentMapperTypes.class, new BasicArguments()));
    Assert.assertFalse(runProgram(app, AppWithMapReduceUsingInconsistentMappers.MapReduceWithInconsistentMapperTypes2.class, new BasicArguments()));
}
Also used : ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Test(org.junit.Test)

Example 34 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleInputsTest method testAddingMultipleInputsWithSameAlias.

@Test
public void testAddingMultipleInputsWithSameAlias() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleInputs.class);
    // will fail because it configured two inputs with the same alias
    Assert.assertFalse(runProgram(app, AppWithMapReduceUsingMultipleInputs.InvalidMapReduce.class, new BasicArguments()));
}
Also used : ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Test(org.junit.Test)

Example 35 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleOutputsTest method testMultipleOutputs.

@Test
public void testMultipleOutputs() throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class);
    final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.PURCHASES);
    Location inputFile = fileSet.getBaseLocation().append("inputFile");
    inputFile.createNew();
    PrintWriter writer = new PrintWriter(inputFile.getOutputStream());
    // the PURCHASES dataset consists of purchase records in the format: <customerId> <spend>
    writer.println("1 20");
    writer.println("1 65");
    writer.println("1 30");
    writer.println("2 5");
    writer.println("2 53");
    writer.println("2 45");
    writer.println("3 101");
    writer.close();
    // Using multiple outputs, this MapReduce send the records to a different path of the same dataset, depending
    // on the value in the data (large spend amounts will go to one file, while small will go to another file.
    runProgram(app, AppWithMapReduceUsingMultipleOutputs.SeparatePurchases.class, new BasicArguments());
    FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.SEPARATED_PURCHASES);
    Assert.assertEquals(ImmutableList.of("1 20", "1 30", "2 5", "2 45"), readFromOutput(outputFileSet, "small_purchases"));
    Assert.assertEquals(ImmutableList.of("1 65", "2 53", "3 101"), readFromOutput(outputFileSet, "large_purchases"));
}
Also used : FileSet(co.cask.cdap.api.dataset.lib.FileSet) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Location(org.apache.twill.filesystem.Location) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

BasicArguments (co.cask.cdap.internal.app.runtime.BasicArguments)37 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)21 Test (org.junit.Test)18 SimpleProgramOptions (co.cask.cdap.internal.app.runtime.SimpleProgramOptions)15 ProgramDescriptor (co.cask.cdap.app.program.ProgramDescriptor)10 ProgramController (co.cask.cdap.app.runtime.ProgramController)8 IOException (java.io.IOException)8 TransactionAware (org.apache.tephra.TransactionAware)7 TransactionExecutor (org.apache.tephra.TransactionExecutor)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 File (java.io.File)6 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)5 HashMap (java.util.HashMap)5 Location (org.apache.twill.filesystem.Location)5 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)4 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)4 ProgramOptions (co.cask.cdap.app.runtime.ProgramOptions)4 ProgramId (co.cask.cdap.proto.id.ProgramId)4 DiscoveryServiceClient (org.apache.twill.discovery.DiscoveryServiceClient)4 Program (co.cask.cdap.app.program.Program)3