Search in sources :

Example 1 with ProvidedOutput

use of io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.

the class BasicMapReduceContext method addOutput.

@Override
public void addOutput(Output output) {
    if (output.getNamespace() != null && output.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
        // trying to access system namespace from a program outside system namespace is not allowed
        throw new IllegalArgumentException(String.format("Accessing Output %s in system namespace " + "is not allowed from the namespace %s", output.getName(), getProgram().getNamespaceId()));
    }
    String alias = output.getAlias();
    if (this.outputs.containsKey(alias)) {
        throw new IllegalArgumentException("Output already configured: " + alias);
    }
    ProvidedOutput providedOutput;
    if (output instanceof Output.DatasetOutput) {
        providedOutput = Outputs.transform((Output.DatasetOutput) output, this);
    } else if (output instanceof Output.OutputFormatProviderOutput) {
        OutputFormatProvider outputFormatProvider = ((Output.OutputFormatProviderOutput) output).getOutputFormatProvider();
        if (outputFormatProvider instanceof DatasetOutputCommitter) {
            // be able to call its methods in MainOutputCommitter. It needs to be a DatasetOutput.
            throw new IllegalArgumentException("Cannot add a DatasetOutputCommitter as an OutputFormatProviderOutput. " + "Add the output as a DatasetOutput.");
        }
        providedOutput = new ProvidedOutput(output, outputFormatProvider);
    } else if (output.getClass().getCanonicalName().startsWith(CDAP_PACKAGE_PREFIX)) {
        // Skip unsupported outputs from within CDAP packages.
        // This is used to ignore unsupported outputs in MapReduce (such as the SQL Engine Output for Spark).
        LOG.info("Unsupported output in MapReduce: {}", output.getClass().getCanonicalName());
        return;
    } else {
        // shouldn't happen unless user defines their own Output class
        throw new IllegalArgumentException(String.format("Output %s has unknown output class %s", output.getName(), output.getClass().getCanonicalName()));
    }
    this.outputs.put(alias, providedOutput);
}
Also used : ProvidedOutput(io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput) Output(io.cdap.cdap.api.data.batch.Output) ProvidedOutput(io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput) DatasetOutputCommitter(io.cdap.cdap.api.data.batch.DatasetOutputCommitter) OutputFormatProvider(io.cdap.cdap.api.data.batch.OutputFormatProvider)

Example 2 with ProvidedOutput

use of io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.

the class MapReduceContextConfig method setOutputs.

private void setOutputs(List<ProvidedOutput> providedOutputs) {
    // we only need to serialize the original Output objects, not the entire ProvidedOutput
    List<Output.DatasetOutput> datasetOutputs = new ArrayList<>();
    for (ProvidedOutput providedOutput : providedOutputs) {
        Output output = providedOutput.getOutput();
        if (output instanceof Output.DatasetOutput) {
            datasetOutputs.add((Output.DatasetOutput) output);
        }
    }
    hConf.set(HCONF_ATTR_OUTPUTS, GSON.toJson(datasetOutputs));
}
Also used : ProvidedOutput(io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput) Output(io.cdap.cdap.api.data.batch.Output) ArrayList(java.util.ArrayList) ProvidedOutput(io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput)

Example 3 with ProvidedOutput

use of io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput in project cdap by caskdata.

the class MapReduceRuntimeService method fixOutputPermissions.

private void fixOutputPermissions(JobContext job, List<ProvidedOutput> outputs) {
    Configuration jobconf = job.getConfiguration();
    Set<String> outputsWithUmask = new HashSet<>();
    Set<String> outputUmasks = new HashSet<>();
    for (ProvidedOutput entry : outputs) {
        String umask = entry.getOutputFormatConfiguration().get(HADOOP_UMASK_PROPERTY);
        if (umask != null) {
            outputsWithUmask.add(entry.getOutput().getAlias());
            outputUmasks.add(umask);
        }
    }
    boolean allOutputsHaveUmask = outputsWithUmask.size() == outputs.size();
    boolean allOutputsAgree = outputUmasks.size() == 1;
    boolean jobConfHasUmask = isProgrammaticConfig(jobconf, HADOOP_UMASK_PROPERTY);
    String jobConfUmask = jobconf.get(HADOOP_UMASK_PROPERTY);
    boolean mustFixUmasks = false;
    if (jobConfHasUmask) {
        // case 1: job conf has a programmatic umask. It prevails.
        mustFixUmasks = !outputsWithUmask.isEmpty();
        if (mustFixUmasks) {
            LOG.info("Overriding permissions of outputs {} because a umask of {} was set programmatically in the job " + "configuration.", outputsWithUmask, jobConfUmask);
        }
    } else if (allOutputsHaveUmask && allOutputsAgree) {
        // case 2: no programmatic umask in job conf, all outputs want the same umask: set it in job conf
        String umaskToUse = outputUmasks.iterator().next();
        jobconf.set(HADOOP_UMASK_PROPERTY, umaskToUse);
        LOG.debug("Setting umask of {} in job configuration because all outputs {} agree on it.", umaskToUse, outputsWithUmask);
    } else {
        // case 3: some outputs configure a umask, but not all of them, or not all the same: use job conf default
        mustFixUmasks = !outputsWithUmask.isEmpty();
        if (mustFixUmasks) {
            LOG.warn("Overriding permissions of outputs {} because they configure different permissions. Falling back " + "to default umask of {} in job configuration.", outputsWithUmask, jobConfUmask);
        }
    }
    // fix all output configurations that have a umask by removing that property from their configs
    if (mustFixUmasks) {
        for (int i = 0; i < outputs.size(); i++) {
            ProvidedOutput output = outputs.get(i);
            if (outputsWithUmask.contains(output.getOutput().getAlias())) {
                Map<String, String> outputConfig = new HashMap<>(output.getOutputFormatConfiguration());
                outputConfig.remove(HADOOP_UMASK_PROPERTY);
                outputs.set(i, new ProvidedOutput(output.getOutput(), output.getOutputFormatProvider(), output.getOutputFormatClassName(), outputConfig));
            }
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) HashMap(java.util.HashMap) ProvidedOutput(io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput) HashSet(java.util.HashSet)

Aggregations

ProvidedOutput (io.cdap.cdap.internal.app.runtime.batch.dataset.output.ProvidedOutput)3 Output (io.cdap.cdap.api.data.batch.Output)2 DatasetOutputCommitter (io.cdap.cdap.api.data.batch.DatasetOutputCommitter)1 OutputFormatProvider (io.cdap.cdap.api.data.batch.OutputFormatProvider)1 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Configuration (org.apache.hadoop.conf.Configuration)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1