use of org.apache.spark.scheduler.SparkListenerEvent in project OpenLineage by OpenLineage.
the class SaveIntoDataSourceCommandVisitor method apply.
@Override
public List<OpenLineage.OutputDataset> apply(SparkListenerEvent event) {
BaseRelation relation;
SaveIntoDataSourceCommand command = (SaveIntoDataSourceCommand) context.getQueryExecution().get().optimizedPlan();
// as other impls of CreatableRelationProvider may not be able to be handled in the generic way.
if (KafkaRelationVisitor.isKafkaSource(command.dataSource())) {
return KafkaRelationVisitor.createKafkaDatasets(outputDataset(), command.dataSource(), command.options(), command.mode(), command.schema());
}
if (command.dataSource().getClass().getName().contains("DeltaDataSource")) {
if (command.options().contains("path")) {
URI uri = URI.create(command.options().get("path").get());
return Collections.singletonList(outputDataset().getDataset(PathUtils.fromURI(uri, "file"), command.schema()));
}
}
SQLContext sqlContext = context.getSparkSession().get().sqlContext();
try {
if (command.dataSource() instanceof RelationProvider) {
RelationProvider p = (RelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options());
} else {
SchemaRelationProvider p = (SchemaRelationProvider) command.dataSource();
relation = p.createRelation(sqlContext, command.options(), command.schema());
}
} catch (Exception ex) {
// Bad detection of errors in scala
if (ex instanceof SQLException) {
// This can happen on SparkListenerSQLExecutionStart for example for sqlite, when database
// does not exist yet - it will be created as command execution
// Still, we can just ignore it on start, because it will work on end
// see SparkReadWriteIntegTest.testReadFromFileWriteToJdbc
log.warn("Can't create relation: ", ex);
return Collections.emptyList();
}
throw ex;
}
LogicalRelation logicalRelation = new LogicalRelation(relation, relation.schema().toAttributes(), Option.empty(), command.isStreaming());
return delegate(context.getOutputDatasetQueryPlanVisitors(), context.getOutputDatasetBuilders(), event).applyOrElse(logicalRelation, ScalaConversionUtils.toScalaFn((lp) -> Collections.<OutputDataset>emptyList())).stream().map(ds -> {
Builder<String, OpenLineage.DatasetFacet> facetsMap = ImmutableMap.<String, OpenLineage.DatasetFacet>builder();
if (ds.getFacets().getAdditionalProperties() != null) {
facetsMap.putAll(ds.getFacets().getAdditionalProperties());
}
ds.getFacets().getAdditionalProperties().putAll(facetsMap.build());
if (SaveMode.Overwrite == command.mode()) {
// rebuild whole dataset with a LifecycleStateChange facet added
OpenLineage.DatasetFacets facets = context.getOpenLineage().newDatasetFacets(ds.getFacets().getDocumentation(), ds.getFacets().getDataSource(), ds.getFacets().getVersion(), ds.getFacets().getSchema(), context.getOpenLineage().newLifecycleStateChangeDatasetFacet(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE, null));
OpenLineage.OutputDataset newDs = context.getOpenLineage().newOutputDataset(ds.getNamespace(), ds.getName(), facets, ds.getOutputFacets());
return newDs;
}
return ds;
}).collect(Collectors.toList());
}
use of org.apache.spark.scheduler.SparkListenerEvent in project OpenLineage by OpenLineage.
the class AppendDataDatasetBuilder method apply.
@Override
public List<OpenLineage.OutputDataset> apply(SparkListenerEvent event) {
// Needs to cast to logical plan despite IntelliJ claiming otherwise.
AppendData appendData = (AppendData) context.getQueryExecution().get().optimizedPlan();
LogicalPlan logicalPlan = (LogicalPlan) (appendData).table();
return delegate(context.getOutputDatasetQueryPlanVisitors(), context.getOutputDatasetBuilders(), event).applyOrElse(logicalPlan, ScalaConversionUtils.toScalaFn((lp) -> Collections.<OpenLineage.OutputDataset>emptyList())).stream().collect(Collectors.toList());
}
use of org.apache.spark.scheduler.SparkListenerEvent in project kylo by Teradata.
the class SparkDataSetProviderV1 method onJobEnd.
@Override
protected void onJobEnd(@Nonnull final Function1<SparkListenerJobEnd, Unit> function, @Nonnull final KyloCatalogClient<DataFrame> client) {
final SparkListener listener = new JavaSparkListener() {
@Override
public void onJobEnd(@Nonnull final SparkListenerJobEnd jobEnd) {
function.apply(jobEnd);
}
// method required for CDH 5.8+
@SuppressWarnings("unused")
public void onOtherEvent(@Nonnull final SparkListenerEvent event) {
// ignored
}
};
((KyloCatalogClientV1) client).getSQLContext().sparkContext().addSparkListener(listener);
}
Aggregations