use of io.cdap.cdap.etl.api.engine.sql.SQLEngineOutput in project cdap by caskdata.
the class MapReduceBatchContext method addOutput.
@Override
public void addOutput(Output output) {
// Skip SQLEngineOutput as this is not supported in MapReduce.
if (output instanceof SQLEngineOutput) {
return;
}
Output actualOutput = suffixOutput(getOutput(output));
Output trackableOutput = CALLER.callUnchecked(() -> {
Output trackableOutput1 = isPreviewEnabled ? actualOutput : ExternalDatasets.makeTrackable(mrContext.getAdmin(), actualOutput);
mrContext.addOutput(trackableOutput1);
return trackableOutput1;
});
outputNames.add(trackableOutput.getAlias());
}
use of io.cdap.cdap.etl.api.engine.sql.SQLEngineOutput in project cdap by caskdata.
the class SparkBatchSinkFactory method writeCombinedRDD.
/**
* Writes a combined RDD using multiple OutputFormatProviders.
* Returns the set of output names that were written, which still require dataset lineage to be recorded.
*/
public <K, V> Set<String> writeCombinedRDD(JavaPairRDD<String, KeyValue<K, V>> combinedRDD, JavaSparkExecutionContext sec, Set<String> sinkNames) {
Map<String, OutputFormatProvider> outputFormatProviders = new HashMap<>();
Set<String> lineageNames = new HashSet<>();
for (String sinkName : sinkNames) {
Set<String> sinkOutputNames = sinkOutputs.get(sinkName);
if (sinkOutputNames == null || sinkOutputNames.isEmpty()) {
// should never happen if validation happened correctly at pipeline configure time
throw new IllegalStateException(sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point.");
}
for (String outputName : sinkOutputNames) {
NamedOutputFormatProvider outputFormatProvider = this.outputFormatProviders.get(outputName);
if (outputFormatProvider == null) {
// Check if this is a SQL engine output. If this is the case, skip this output.
SQLEngineOutput sqlEngineOutput = sqlOutputs.get(outputName);
if (sqlEngineOutput != null) {
continue;
}
// grouped with other sinks
throw new IllegalStateException(String.format("sink '%s' does not use an OutputFormatProvider. " + "This indicates that there is a planner bug. " + "Please report the issue and turn off stage consolidation by setting '%s'" + " to false in the runtime arguments.", sinkName, Constants.CONSOLIDATE_STAGES));
}
lineageNames.add(outputFormatProvider.name);
outputFormatProviders.put(outputName, outputFormatProvider);
}
}
Configuration hConf = new Configuration();
Map<String, Set<String>> groupSinkOutputs = new HashMap<>();
for (String sink : sinkNames) {
Set<String> outputFormatProvidersForSink = sinkOutputs.get(sink).stream().filter(outputFormatProviders::containsKey).collect(Collectors.toSet());
if (!outputFormatProvidersForSink.isEmpty()) {
groupSinkOutputs.put(sink, outputFormatProvidersForSink);
}
}
MultiOutputFormat.addOutputs(hConf, outputFormatProviders, groupSinkOutputs);
hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, MultiOutputFormat.class.getName());
RDDUtils.saveHadoopDataset(combinedRDD, hConf);
return lineageNames;
}
use of io.cdap.cdap.etl.api.engine.sql.SQLEngineOutput in project cdap by caskdata.
the class SQLEngineCollection method tryStoreDirect.
@Override
public boolean tryStoreDirect(StageSpec stageSpec) {
String stageName = stageSpec.getName();
// Check if this stage should be excluded from executing in the SQL engine
if (adapter.getExcludedStageNames().contains(stageName)) {
return false;
}
// Get SQLEngineOutput instance for this stage
SQLEngineOutput sqlEngineOutput = sinkFactory.getSQLEngineOutput(stageName);
if (sqlEngineOutput != null) {
// Exceptions are handled and logged so standard sink flow takes over in case of failure.
try {
SQLEngineJob<Boolean> writeJob = adapter.write(datasetName, sqlEngineOutput);
adapter.waitForJobAndThrowException(writeJob);
return writeJob.waitFor();
} catch (SQLEngineException e) {
LOG.warn(DIRECT_WRITE_ERROR, stageName, e);
}
}
return false;
}
use of io.cdap.cdap.etl.api.engine.sql.SQLEngineOutput in project cdap by caskdata.
the class MockSinkWithWriteCapability method prepareRun.
@Override
public void prepareRun(BatchSinkContext context) throws Exception {
super.prepareRun(context);
context.addOutput(new SQLEngineOutput(NAME, NAME, MockSQLEngineWithCapabilities.class.getName(), Collections.emptyMap()));
}
Aggregations