Search in sources :

Example 11 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithMultipleInputsTest method testSimpleJoin.

@Test
public void testSimpleJoin() throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleInputs.class);
    final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.PURCHASES);
    Location inputFile = fileSet.getBaseLocation().append("inputFile");
    inputFile.createNew();
    PrintWriter writer = new PrintWriter(inputFile.getOutputStream());
    // the PURCHASES dataset consists of purchase records in the format: <customerId> <spend>
    writer.println("1 20");
    writer.println("1 25");
    writer.println("1 30");
    writer.println("2 5");
    writer.close();
    // write some of the purchases to the stream
    writeToStream(AppWithMapReduceUsingMultipleInputs.PURCHASES, "2 13");
    writeToStream(AppWithMapReduceUsingMultipleInputs.PURCHASES, "3 60");
    FileSet fileSet2 = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.CUSTOMERS);
    inputFile = fileSet2.getBaseLocation().append("inputFile");
    inputFile.createNew();
    // the CUSTOMERS dataset consists of records in the format: <customerId> <customerName>
    writer = new PrintWriter(inputFile.getOutputStream());
    writer.println("1 Bob");
    writer.println("2 Samuel");
    writer.println("3 Joe");
    writer.close();
    // Using multiple inputs, this MapReduce will join on the two above datasets to get aggregate results.
    // The records are expected to be in the form: <customerId> <customerName> <totalSpend>
    runProgram(app, AppWithMapReduceUsingMultipleInputs.ComputeSum.class, new BasicArguments());
    FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.OUTPUT_DATASET);
    // will only be 1 part file, due to the small amount of data
    Location outputLocation = outputFileSet.getBaseLocation().append("output").append("part-r-00000");
    List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputLocation), Charsets.UTF_8));
    Assert.assertEquals(ImmutableList.of("1 Bob 75", "2 Samuel 18", "3 Joe 60"), lines);
    // assert that the mapper was initialized and destroyed (this doesn't happen when using hadoop's MultipleOutputs).
    Assert.assertEquals("true", System.getProperty("mapper.initialized"));
    Assert.assertEquals("true", System.getProperty("mapper.destroyed"));
}
Also used : FileSet(co.cask.cdap.api.dataset.lib.FileSet) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Location(org.apache.twill.filesystem.Location) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 12 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testTimePartitionedWithMR.

@Test
public void testTimePartitionedWithMR() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithTimePartitionedFileSet.class);
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithTimePartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    final long time = DATE_FORMAT.parse("1/15/15 11:15 am").getTime();
    final long time5 = time + TimeUnit.MINUTES.toMillis(5);
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time);
    final ImmutableMap<String, String> assignedMetadata = ImmutableMap.of("region", "13", "data.source.name", "input", "data.source.type", "table");
    TimePartitionedFileSetArguments.setOutputPartitionMetadata(outputArgs, assignedMetadata);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final TimePartitionedFileSet tpfs = datasetCache.getDataset(TIME_PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            TimePartitionDetail partition = tpfs.getPartitionByTime(time);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("2015-01-15/11-15"));
            Assert.assertEquals(assignedMetadata, partition.getMetadata().asMap());
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // now run the m/r again with a new partition time, say 5 minutes later
    TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time5);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
    // make the mapreduce add the partition in destroy, to validate that this does not fail the job
    runtimeArguments.put(AppWithTimePartitionedFileSet.COMPAT_ADD_PARTITION, "true");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = tpfs.getPartitionByTime(time5);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("2015-01-15/11-20"));
        }
    });
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time5 + TimeUnit.MINUTES.toMillis(5));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithTimePartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("2", row.getString("y"));
        }
    });
    // now run a map/reduce that reads a range of the partitions, namely the first one
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time + TimeUnit.MINUTES.toMillis(2));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertNull(row.get("y"));
        }
    });
    // now run a map/reduce that reads no partitions (because the range matches nothing)
    TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(10));
    TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time - TimeUnit.MINUTES.toMillis(9));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) TimePartitionDetail(co.cask.cdap.api.dataset.lib.TimePartitionDetail) Row(co.cask.cdap.api.dataset.table.Row) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) Test(org.junit.Test)

Example 13 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class AbstractProgramRuntimeService method updateProgramOptions.

/**
   * Updates the given {@link ProgramOptions} and return a new instance.
   * It copies the {@link ProgramOptions}. Then it adds all entries returned by {@link #getExtraProgramOptions()}
   * followed by adding the {@link RunId} to the system arguments.
   *
   * Also scope resolution will be performed on the user arguments on the application and program.
   *
   * @param programId the program id
   * @param options The {@link ProgramOptions} in which the RunId to be included
   * @param runId   The RunId to be included
   * @return the copy of the program options with RunId included in them
   */
private ProgramOptions updateProgramOptions(ProgramId programId, ProgramOptions options, RunId runId) {
    // Build the system arguments
    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
    builder.putAll(options.getArguments().asMap());
    builder.putAll(getExtraProgramOptions());
    builder.put(ProgramOptionConstants.RUN_ID, runId.getId());
    // Resolves the user arguments
    // First resolves at the cluster scope if the cluster.name is not empty
    String clusterName = options.getArguments().getOption(Constants.CLUSTER_NAME);
    Map<String, String> userArguments = options.getUserArguments().asMap();
    if (!Strings.isNullOrEmpty(clusterName)) {
        userArguments = RuntimeArguments.extractScope(CLUSTER_SCOPE, clusterName, userArguments);
    }
    // Then resolves at the application scope
    userArguments = RuntimeArguments.extractScope(APPLICATION_SCOPE, programId.getApplication(), userArguments);
    // Then resolves at the program level
    userArguments = RuntimeArguments.extractScope(programId.getType().getScope(), programId.getProgram(), userArguments);
    return new SimpleProgramOptions(options.getName(), new BasicArguments(builder.build()), new BasicArguments(userArguments), options.isDebug());
}
Also used : SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 14 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class AbstractProgramRuntimeService method createPluginSnapshot.

/**
   * Return the copy of the {@link ProgramOptions} including locations of plugin artifacts in it.
   * @param options the {@link ProgramOptions} in which the locations of plugin artifacts needs to be included
   * @param programId Id of the Program
   * @param tempDir Temporary Directory to create the plugin artifact snapshot
   * @param appSpec program's Application Specification
   * @return the copy of the program options with locations of plugin artifacts included in them
   */
private ProgramOptions createPluginSnapshot(ProgramOptions options, ProgramId programId, File tempDir, @Nullable ApplicationSpecification appSpec) throws Exception {
    // appSpec is null in an unit test
    if (appSpec == null || appSpec.getPlugins().isEmpty()) {
        return options;
    }
    Set<String> files = Sets.newHashSet();
    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
    builder.putAll(options.getArguments().asMap());
    for (Map.Entry<String, Plugin> pluginEntry : appSpec.getPlugins().entrySet()) {
        Plugin plugin = pluginEntry.getValue();
        File destFile = new File(tempDir, Artifacts.getFileName(plugin.getArtifactId()));
        // Skip if the file has already been copied.
        if (!files.add(destFile.getName())) {
            continue;
        }
        try {
            ArtifactId artifactId = Artifacts.toArtifactId(programId.getNamespaceId(), plugin.getArtifactId());
            copyArtifact(artifactId, artifactRepository.getArtifact(artifactId.toId()), destFile);
        } catch (ArtifactNotFoundException e) {
            throw new IllegalArgumentException(String.format("Artifact %s could not be found", plugin.getArtifactId()), e);
        }
    }
    LOG.debug("Plugin artifacts of {} copied to {}", programId, tempDir.getAbsolutePath());
    builder.put(ProgramOptionConstants.PLUGIN_DIR, tempDir.getAbsolutePath());
    return new SimpleProgramOptions(options.getName(), new BasicArguments(builder.build()), options.getUserArguments(), options.isDebug());
}
Also used : ArtifactId(co.cask.cdap.proto.id.ArtifactId) ImmutableMap(com.google.common.collect.ImmutableMap) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) ArtifactNotFoundException(co.cask.cdap.common.ArtifactNotFoundException) Plugin(co.cask.cdap.api.plugin.Plugin)

Example 15 with BasicArguments

use of co.cask.cdap.internal.app.runtime.BasicArguments in project cdap by caskdata.

the class AbstractProgramTwillRunnable method createProgramOptions.

/**
   * Creates program options. It contains program and user arguments as passed form the distributed program runner.
   * Extra program arguments are inserted based on the environment information (e.g. host, instance id). Also all
   * configs available through the TwillRunnable configs are also available through program arguments.
   */
private ProgramOptions createProgramOptions(CommandLine cmdLine, TwillContext context, Map<String, String> configs) {
    ProgramOptions original = GSON.fromJson(cmdLine.getOptionValue(RunnableOptions.PROGRAM_OPTIONS), ProgramOptions.class);
    // Overwrite them with environmental information
    Map<String, String> arguments = Maps.newHashMap(original.getArguments().asMap());
    arguments.put(ProgramOptionConstants.INSTANCE_ID, Integer.toString(context.getInstanceId()));
    arguments.put(ProgramOptionConstants.INSTANCES, Integer.toString(context.getInstanceCount()));
    arguments.put(ProgramOptionConstants.TWILL_RUN_ID, context.getApplicationRunId().getId());
    arguments.put(ProgramOptionConstants.HOST, context.getHost().getCanonicalHostName());
    arguments.putAll(configs);
    return new SimpleProgramOptions(context.getSpecification().getName(), new BasicArguments(arguments), resolveScope(original.getUserArguments()), original.isDebug());
}
Also used : SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) SimpleProgramOptions(co.cask.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(co.cask.cdap.app.runtime.ProgramOptions)

Aggregations

BasicArguments (co.cask.cdap.internal.app.runtime.BasicArguments)30 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 Test (org.junit.Test)17 SimpleProgramOptions (co.cask.cdap.internal.app.runtime.SimpleProgramOptions)11 ProgramDescriptor (co.cask.cdap.app.program.ProgramDescriptor)10 ProgramController (co.cask.cdap.app.runtime.ProgramController)9 File (java.io.File)5 TransactionExecutor (org.apache.tephra.TransactionExecutor)5 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)4 AbstractListener (co.cask.cdap.internal.app.runtime.AbstractListener)4 IOException (java.io.IOException)4 TransactionAware (org.apache.tephra.TransactionAware)4 Location (org.apache.twill.filesystem.Location)4 FileSet (co.cask.cdap.api.dataset.lib.FileSet)3 Arguments (co.cask.cdap.app.runtime.Arguments)3 ProgramOptions (co.cask.cdap.app.runtime.ProgramOptions)3 RandomEndpointStrategy (co.cask.cdap.common.discovery.RandomEndpointStrategy)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Discoverable (org.apache.twill.discovery.Discoverable)3 DiscoveryServiceClient (org.apache.twill.discovery.DiscoveryServiceClient)3