Search in sources :

Example 6 with BasicArguments

use of io.cdap.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testMapreduceWithFile.

private void testMapreduceWithFile(String inputDatasetName, String inputPaths, String outputDatasetName, String outputPath, Class appClass, Class mrClass, Map<String, String> extraRuntimeArgs, @Nullable final String counterTableName, @Nullable final String outputSeparator) throws Exception {
    final ApplicationWithPrograms app = deployApp(appClass, new AppWithMapReduceUsingFileSet.AppConfig(inputDatasetName, outputDatasetName));
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    FileSetArguments.setInputPaths(inputArgs, inputPaths);
    FileSetArguments.setOutputPath(outputArgs, outputPath);
    if (outputSeparator != null) {
        outputArgs.put(FileSetProperties.OUTPUT_PROPERTIES_PREFIX + TextOutputFormat.SEPERATOR, "#");
    }
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, inputDatasetName, inputArgs));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, outputDatasetName, outputArgs));
    if (extraRuntimeArgs != null) {
        runtimeArguments.putAll(extraRuntimeArgs);
    }
    // clear the counters in case a previous test case left behind some values
    if (counterTableName != null) {
        Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", () -> {
            KeyValueTable counters = datasetCache.getDataset(counterTableName);
            counters.delete(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS);
            counters.delete(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS);
        });
    }
    // write a handful of numbers to a file; compute their sum, too.
    final long[] values = { 15L, 17L, 7L, 3L };
    final FileSet input = datasetCache.getDataset(inputDatasetName, inputArgs);
    long sum = 0L, count = 1;
    long inputRecords = 0;
    for (Location inputLocation : input.getInputLocations()) {
        final PrintWriter writer = new PrintWriter(inputLocation.getOutputStream());
        for (long value : values) {
            value *= count;
            writer.println(value);
            sum += value;
            inputRecords++;
        }
        writer.close();
        count++;
    }
    runProgram(app, mrClass, new BasicArguments(runtimeArguments));
    // output location in file system is a directory that contains a part file, a _SUCCESS file, and checksums
    // (.<filename>.crc) for these files. Find the actual part file. Its name begins with "part". In this case,
    // there should be only one part file (with this small data, we have a single reducer).
    final FileSet results = datasetCache.getDataset(outputDatasetName, outputArgs);
    Location resultLocation = results.getOutputLocation();
    if (resultLocation.isDirectory()) {
        for (Location child : resultLocation.list()) {
            if (!child.isDirectory() && child.getName().startsWith("part")) {
                resultLocation = child;
                break;
            }
        }
    }
    Assert.assertFalse(resultLocation.isDirectory());
    // read output and verify result
    String line = CharStreams.readFirstLine(CharStreams.newReaderSupplier(Locations.newInputSupplier(resultLocation), Charsets.UTF_8));
    Assert.assertNotNull(line);
    String[] fields = line.split(outputSeparator == null ? ":" : outputSeparator);
    Assert.assertEquals(2, fields.length);
    Assert.assertEquals(AppWithMapReduceUsingFileSet.FileMapper.ONLY_KEY, fields[0]);
    Assert.assertEquals(sum, Long.parseLong(fields[1]));
    if (counterTableName != null) {
        final long totalInputRecords = inputRecords;
        Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", () -> {
            KeyValueTable counters = datasetCache.getDataset(counterTableName);
            Assert.assertEquals(totalInputRecords, counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS, 0L));
            Assert.assertEquals(1L, counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS, 0L));
        });
    }
}
Also used : FileSet(io.cdap.cdap.api.dataset.lib.FileSet) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Location(org.apache.twill.filesystem.Location) PrintWriter(java.io.PrintWriter)

Example 7 with BasicArguments

use of io.cdap.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleOutputsTest method testMultipleOutputs.

@Test
public void testMultipleOutputs() throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class);
    final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.PURCHASES);
    Location inputFile = fileSet.getBaseLocation().append("inputFile");
    inputFile.createNew();
    PrintWriter writer = new PrintWriter(inputFile.getOutputStream());
    // the PURCHASES dataset consists of purchase records in the format: <customerId> <spend>
    writer.println("1 20");
    writer.println("1 65");
    writer.println("1 30");
    writer.println("2 5");
    writer.println("2 53");
    writer.println("2 45");
    writer.println("3 101");
    writer.close();
    // Using multiple outputs, this MapReduce send the records to a different path of the same dataset, depending
    // on the value in the data (large spend amounts will go to one file, while small will go to another file.
    runProgram(app, AppWithMapReduceUsingMultipleOutputs.SeparatePurchases.class, new BasicArguments());
    FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.SEPARATED_PURCHASES);
    Assert.assertEquals(ImmutableList.of("1 20", "1 30", "2 5", "2 45"), readFromOutput(outputFileSet, "small_purchases"));
    Assert.assertEquals(ImmutableList.of("1 65", "2 53", "3 101"), readFromOutput(outputFileSet, "large_purchases"));
}
Also used : FileSet(io.cdap.cdap.api.dataset.lib.FileSet) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Location(org.apache.twill.filesystem.Location) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 8 with BasicArguments

use of io.cdap.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleOutputsTest method testAddingMultipleOutputsWithSameAlias.

@Test
public void testAddingMultipleOutputsWithSameAlias() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class);
    // will fail because it configured two outputs with the same alias
    Assert.assertFalse(runProgram(app, AppWithMapReduceUsingMultipleOutputs.InvalidMapReduce.class, new BasicArguments()));
}
Also used : ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Test(org.junit.Test)

Example 9 with BasicArguments

use of io.cdap.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class DefaultRuntimeJobTest method testInjector.

@Test
public void testInjector() throws Exception {
    CConfiguration cConf = CConfiguration.create();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMP_FOLDER.newFolder().toString());
    LocationFactory locationFactory = new LocalLocationFactory(TEMP_FOLDER.newFile());
    DefaultRuntimeJob defaultRuntimeJob = new DefaultRuntimeJob();
    Arguments systemArgs = new BasicArguments(Collections.singletonMap(SystemArguments.PROFILE_NAME, "test"));
    Node node = new Node("test", Node.Type.MASTER, "127.0.0.1", System.currentTimeMillis(), Collections.emptyMap());
    Cluster cluster = new Cluster("test", ClusterStatus.RUNNING, Collections.singleton(node), Collections.emptyMap());
    ProgramRunId programRunId = NamespaceId.DEFAULT.app("app").workflow("workflow").run(RunIds.generate());
    SimpleProgramOptions programOpts = new SimpleProgramOptions(programRunId.getParent(), systemArgs, new BasicArguments());
    Injector injector = Guice.createInjector(defaultRuntimeJob.createModules(new RuntimeJobEnvironment() {

        @Override
        public LocationFactory getLocationFactory() {
            return locationFactory;
        }

        @Override
        public TwillRunner getTwillRunner() {
            return new NoopTwillRunnerService();
        }

        @Override
        public Map<String, String> getProperties() {
            return Collections.emptyMap();
        }
    }, cConf, programRunId, programOpts));
    injector.getInstance(LogAppenderInitializer.class);
    defaultRuntimeJob.createCoreServices(injector, systemArgs, cluster);
}
Also used : Node(io.cdap.cdap.runtime.spi.provisioner.Node) Arguments(io.cdap.cdap.app.runtime.Arguments) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory) NoopTwillRunnerService(io.cdap.cdap.common.twill.NoopTwillRunnerService) Injector(com.google.inject.Injector) RuntimeJobEnvironment(io.cdap.cdap.runtime.spi.runtimejob.RuntimeJobEnvironment) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) Test(org.junit.Test)

Example 10 with BasicArguments

use of io.cdap.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(io.cdap.cdap.api.dataset.lib.Partition) Table(io.cdap.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Row(io.cdap.cdap.api.dataset.table.Row)

Aggregations

BasicArguments (io.cdap.cdap.internal.app.runtime.BasicArguments)90 SimpleProgramOptions (io.cdap.cdap.internal.app.runtime.SimpleProgramOptions)54 Test (org.junit.Test)44 ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)36 ProgramDescriptor (io.cdap.cdap.app.program.ProgramDescriptor)32 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)32 HashMap (java.util.HashMap)32 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)28 ProgramId (io.cdap.cdap.proto.id.ProgramId)24 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)22 SystemArguments (io.cdap.cdap.internal.app.runtime.SystemArguments)20 ImmutableMap (com.google.common.collect.ImmutableMap)18 Map (java.util.Map)18 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)16 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)16 IOException (java.io.IOException)16 Injector (com.google.inject.Injector)14 Collections (java.util.Collections)14 ProgramController (io.cdap.cdap.app.runtime.ProgramController)12 RunIds (io.cdap.cdap.common.app.RunIds)12