Search in sources :

Example 1 with DataPublisher

use of org.apache.gobblin.publisher.DataPublisher in project incubator-gobblin by apache.

the class SafeDatasetCommit method checkForUnpublishedWUHandling.

void checkForUnpublishedWUHandling(String datasetUrn, JobState.DatasetState datasetState, Class<? extends DataPublisher> dataPublisherClass, Closer closer) throws ReflectiveOperationException, IOException {
    if (UnpublishedHandling.class.isAssignableFrom(dataPublisherClass)) {
        // pass in jobstate to retrieve properties
        DataPublisher publisher = closer.register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState()));
        log.info(String.format("Calling publisher to handle unpublished work units for dataset %s of job %s.", datasetUrn, this.jobContext.getJobId()));
        ((UnpublishedHandling) publisher).handleUnpublishedWorkUnits(datasetState.getTaskStatesAsWorkUnitStates());
    }
}
Also used : UnpublishedHandling(org.apache.gobblin.publisher.UnpublishedHandling) DataPublisher(org.apache.gobblin.publisher.DataPublisher)

Example 2 with DataPublisher

use of org.apache.gobblin.publisher.DataPublisher in project incubator-gobblin by apache.

the class SafeDatasetCommit method call.

@Override
public Void call() throws Exception {
    if (this.datasetState.getState() == JobState.RunningState.COMMITTED) {
        log.info(this.datasetUrn + " have been committed.");
        return null;
    }
    metricContext = Instrumented.getMetricContext(datasetState, SafeDatasetCommit.class);
    finalizeDatasetStateBeforeCommit(this.datasetState);
    Class<? extends DataPublisher> dataPublisherClass;
    try (Closer closer = Closer.create()) {
        dataPublisherClass = JobContext.getJobDataPublisherClass(this.jobContext.getJobState()).or((Class<? extends DataPublisher>) Class.forName(ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE));
        if (!canCommitDataset(datasetState)) {
            log.warn(String.format("Not committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
            checkForUnpublishedWUHandling(this.datasetUrn, this.datasetState, dataPublisherClass, closer);
            throw new RuntimeException(String.format("Not committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
        }
    } catch (ReflectiveOperationException roe) {
        log.error("Failed to instantiate data publisher for dataset %s of job %s.", this.datasetUrn, this.jobContext.getJobId(), roe);
        throw new RuntimeException(roe);
    } finally {
        maySubmitFailureEvent(datasetState);
    }
    if (this.isJobCancelled) {
        log.info("Executing commit steps although job is cancelled due to job commit policy: " + this.jobContext.getJobCommitPolicy());
    }
    Optional<CommitSequence.Builder> commitSequenceBuilder = Optional.absent();
    boolean canPersistStates = true;
    try (Closer closer = Closer.create()) {
        if (this.shouldCommitDataInJob) {
            log.info(String.format("Committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
            ListMultimap<TaskFactoryWrapper, TaskState> taskStatesByFactory = groupByTaskFactory(this.datasetState);
            for (Map.Entry<TaskFactoryWrapper, Collection<TaskState>> entry : taskStatesByFactory.asMap().entrySet()) {
                TaskFactory taskFactory = entry.getKey().getTaskFactory();
                if (this.deliverySemantics == DeliverySemantics.EXACTLY_ONCE) {
                    if (taskFactory != null) {
                        throw new RuntimeException("Custom task factories do not support exactly once delivery semantics.");
                    }
                    generateCommitSequenceBuilder(this.datasetState, entry.getValue());
                } else {
                    DataPublisher publisher;
                    if (taskFactory == null) {
                        publisher = DataPublisherFactory.get(dataPublisherClass.getName(), this.jobContext.getJobState(), this.jobContext.getJobBroker());
                        // the closer
                        if (!DataPublisherFactory.isPublisherCacheable(publisher)) {
                            closer.register(publisher);
                        }
                    } else {
                        // NOTE: sharing of publishers is not supported when they are instantiated through the TaskFactory.
                        // This should be revisited if sharing is required.
                        publisher = taskFactory.createDataPublisher(this.datasetState);
                    }
                    if (this.isJobCancelled) {
                        if (publisher.canBeSkipped()) {
                            log.warn(publisher.getClass() + " will be skipped.");
                        } else {
                            canPersistStates = false;
                            throw new RuntimeException("Cannot persist state upon cancellation because publisher has unfinished work and cannot be skipped.");
                        }
                    } else if (this.isMultithreaded && !publisher.isThreadSafe()) {
                        log.warn(String.format("Gobblin is set up to parallelize publishing, however the publisher %s is not thread-safe. " + "Falling back to serial publishing.", publisher.getClass().getName()));
                        safeCommitDataset(entry.getValue(), publisher);
                    } else {
                        commitDataset(entry.getValue(), publisher);
                    }
                }
            }
            this.datasetState.setState(JobState.RunningState.COMMITTED);
        } else {
            if (this.datasetState.getState() == JobState.RunningState.SUCCESSFUL) {
                this.datasetState.setState(JobState.RunningState.COMMITTED);
            }
        }
    } catch (Throwable throwable) {
        log.error(String.format("Failed to commit dataset state for dataset %s of job %s", this.datasetUrn, this.jobContext.getJobId()), throwable);
        throw new RuntimeException(throwable);
    } finally {
        try {
            finalizeDatasetState(datasetState, datasetUrn);
            maySubmitFailureEvent(datasetState);
            maySubmitLineageEvent(datasetState);
            if (commitSequenceBuilder.isPresent()) {
                buildAndExecuteCommitSequence(commitSequenceBuilder.get(), datasetState, datasetUrn);
                datasetState.setState(JobState.RunningState.COMMITTED);
            } else if (canPersistStates) {
                persistDatasetState(datasetUrn, datasetState);
            }
        } catch (IOException | RuntimeException ioe) {
            log.error(String.format("Failed to persist dataset state for dataset %s of job %s", datasetUrn, this.jobContext.getJobId()), ioe);
            throw new RuntimeException(ioe);
        }
    }
    return null;
}
Also used : Closer(com.google.common.io.Closer) FailureEventBuilder(org.apache.gobblin.metrics.event.FailureEventBuilder) LineageEventBuilder(org.apache.gobblin.metrics.event.lineage.LineageEventBuilder) IOException(java.io.IOException) DataPublisher(org.apache.gobblin.publisher.DataPublisher) TaskFactory(org.apache.gobblin.runtime.task.TaskFactory) Collection(java.util.Collection) Map(java.util.Map)

Aggregations

DataPublisher (org.apache.gobblin.publisher.DataPublisher)2 Closer (com.google.common.io.Closer)1 IOException (java.io.IOException)1 Collection (java.util.Collection)1 Map (java.util.Map)1 FailureEventBuilder (org.apache.gobblin.metrics.event.FailureEventBuilder)1 LineageEventBuilder (org.apache.gobblin.metrics.event.lineage.LineageEventBuilder)1 UnpublishedHandling (org.apache.gobblin.publisher.UnpublishedHandling)1 TaskFactory (org.apache.gobblin.runtime.task.TaskFactory)1