use of io.cdap.cdap.etl.api.relational.RelationalTransform in project cdap by caskdata.
the class BatchSQLEngineAdapter method tryRelationalTransform.
/**
* This method is called when engine is present and is willing to try performing a relational transform.
*
* @param stageSpec stage specification
* @param transform transform plugin
* @param input input collections
* @return resulting collection or empty optional if tranform can't be done with this engine
*/
public Optional<SQLEngineJob<SQLDataset>> tryRelationalTransform(StageSpec stageSpec, RelationalTransform transform, Map<String, SparkCollection<Object>> input) {
String stageName = stageSpec.getName();
Map<String, Relation> inputRelations = input.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> sqlEngine.getRelation(new SQLRelationDefinition(e.getKey(), stageSpec.getInputSchemas().get(e.getKey())))));
BasicRelationalTransformContext pluginContext = new BasicRelationalTransformContext(getSQLRelationalEngine(), inputRelations, stageSpec.getInputSchemas(), stageSpec.getOutputSchema());
if (!transform.transform(pluginContext)) {
// Plugin was not able to do relational tranform with this engine
return Optional.empty();
}
if (pluginContext.getOutputRelation() == null) {
// Plugin said that tranformation was success but failed to set output
throw new IllegalStateException("Plugin " + transform + " did not produce a relational output");
}
if (!pluginContext.getOutputRelation().isValid()) {
// An output is set to invalid relation, probably some of transforms are not supported by an engine
return Optional.empty();
}
// Ensure input and output schemas for this stage are supported by the engine
if (stageSpec.getInputSchemas().values().stream().anyMatch(s -> !sqlEngine.supportsInputSchema(s))) {
return Optional.empty();
}
if (!sqlEngine.supportsOutputSchema(stageSpec.getOutputSchema())) {
return Optional.empty();
}
// Validate transformation definition with engine
SQLTransformDefinition transformDefinition = new SQLTransformDefinition(stageName, pluginContext.getOutputRelation(), stageSpec.getOutputSchema(), Collections.emptyMap(), Collections.emptyMap());
if (!sqlEngine.canTransform(transformDefinition)) {
return Optional.empty();
}
return Optional.of(runJob(stageSpec.getName(), SQLEngineJobType.EXECUTE, () -> {
// Push all stages that need to be pushed to execute this aggregation
input.forEach((name, collection) -> {
if (!exists(name)) {
push(name, stageSpec.getInputSchemas().get(name), collection);
}
});
// Initialize metrics collector
DefaultStageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
StageStatisticsCollector statisticsCollector = statsCollectors.get(stageName);
// Collect input datasets and execute transformation
Map<String, SQLDataset> inputDatasets = input.keySet().stream().collect(Collectors.toMap(Function.identity(), this::getDatasetForStage));
// Count input records
for (SQLDataset inputDataset : inputDatasets.values()) {
countRecordsIn(inputDataset, statisticsCollector, stageMetrics);
}
// Execute transform
SQLTransformRequest sqlContext = new SQLTransformRequest(inputDatasets, stageSpec.getName(), pluginContext.getOutputRelation(), stageSpec.getOutputSchema());
SQLDataset transformed = sqlEngine.transform(sqlContext);
// Count output records
countRecordsOut(transformed, statisticsCollector, stageMetrics);
return transformed;
}));
}
Aggregations