use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class SLAEventKafkaJobMonitorTest method testFilterByDatasetURN.
@Test
public void testFilterByDatasetURN() throws Exception {
Properties props = new Properties();
props.put(SLAEventKafkaJobMonitor.TEMPLATE_KEY, templateURI.toString());
props.put(SLAEventKafkaJobMonitor.DATASET_URN_FILTER_KEY, "^/accept.*");
Config config = ConfigFactory.parseProperties(props).withFallback(superConfig);
SLAEventKafkaJobMonitor monitor = new SLAEventKafkaJobMonitor("topic", null, new URI("/base/URI"), HighLevelConsumerTest.getSimpleConfig(Optional.of(KafkaJobMonitor.KAFKA_JOB_MONITOR_PREFIX)), new NoopSchemaVersionWriter(), Optional.of(Pattern.compile("^/accept.*")), Optional.<Pattern>absent(), this.templateURI, ImmutableMap.<String, String>of());
monitor.buildMetricsContextAndMetrics();
GobblinTrackingEvent event;
Collection<Either<JobSpec, URI>> jobSpecs;
event = createSLAEvent("event", new URI("/accept/myDataset"), Maps.<String, String>newHashMap());
jobSpecs = monitor.parseJobSpec(event);
Assert.assertEquals(jobSpecs.size(), 1);
Assert.assertEquals(monitor.getRejectedEvents().getCount(), 0);
event = createSLAEvent("event", new URI("/reject/myDataset"), Maps.<String, String>newHashMap());
jobSpecs = monitor.parseJobSpec(event);
Assert.assertEquals(jobSpecs.size(), 0);
Assert.assertEquals(monitor.getRejectedEvents().getCount(), 1);
monitor.shutdownMetrics();
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class GobblinMultiTaskAttempt method commit.
/**
* Commit {@link #tasks} by 1. calling {@link Task#commit()} in parallel; 2. executing any additional {@link CommitStep};
* 3. persist task statestore.
* @throws IOException
*/
public void commit() throws IOException {
if (this.tasks == null || this.tasks.isEmpty()) {
log.warn("No tasks to be committed in container " + containerIdOptional.or(""));
return;
}
Iterator<Callable<Void>> callableIterator = Iterators.transform(this.tasks.iterator(), new Function<Task, Callable<Void>>() {
@Override
public Callable<Void> apply(final Task task) {
return new Callable<Void>() {
@Nullable
@Override
public Void call() throws Exception {
task.commit();
return null;
}
};
}
});
try {
List<Either<Void, ExecutionException>> executionResults = new IteratorExecutor<>(callableIterator, this.getTaskCommitThreadPoolSize(), ExecutorsUtils.newDaemonThreadFactory(Optional.of(log), Optional.of("Task-committing-pool-%d"))).executeAndGetResults();
IteratorExecutor.logFailures(executionResults, log, 10);
} catch (InterruptedException ie) {
log.error("Committing of tasks interrupted. Aborting.");
throw new RuntimeException(ie);
} finally {
persistTaskStateStore();
if (this.cleanupCommitSteps != null) {
for (CommitStep cleanupCommitStep : this.cleanupCommitSteps) {
log.info("Executing additional commit step.");
cleanupCommitStep.execute();
}
}
}
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class JobContext method commit.
/**
* Commit the job based on whether the job is cancelled.
*/
void commit(final boolean isJobCancelled) throws IOException {
this.datasetStatesByUrns = Optional.of(computeDatasetStatesByUrns());
final boolean shouldCommitDataInJob = shouldCommitDataInJob(this.jobState);
final DeliverySemantics deliverySemantics = DeliverySemantics.parse(this.jobState);
final int numCommitThreads = numCommitThreads();
if (!shouldCommitDataInJob) {
this.logger.info("Job will not commit data since data are committed by tasks.");
}
try {
if (this.datasetStatesByUrns.isPresent()) {
this.logger.info("Persisting dataset urns.");
this.datasetStateStore.persistDatasetURNs(this.jobName, this.datasetStatesByUrns.get().keySet());
}
List<Either<Void, ExecutionException>> result = new IteratorExecutor<>(Iterables.transform(this.datasetStatesByUrns.get().entrySet(), new Function<Map.Entry<String, DatasetState>, Callable<Void>>() {
@Nullable
@Override
public Callable<Void> apply(final Map.Entry<String, DatasetState> entry) {
return createSafeDatasetCommit(shouldCommitDataInJob, isJobCancelled, deliverySemantics, entry.getKey(), entry.getValue(), numCommitThreads > 1, JobContext.this);
}
}).iterator(), numCommitThreads, ExecutorsUtils.newThreadFactory(Optional.of(this.logger), Optional.of("Commit-thread-%d"))).executeAndGetResults();
IteratorExecutor.logFailures(result, LOG, 10);
if (!IteratorExecutor.verifyAllSuccessful(result)) {
this.jobState.setState(JobState.RunningState.FAILED);
throw new IOException("Failed to commit dataset state for some dataset(s) of job " + this.jobId);
}
} catch (InterruptedException exc) {
throw new IOException(exc);
}
this.jobState.setState(JobState.RunningState.COMMITTED);
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class IteratorExecutor method logFailures.
/**
* Log failures in the output of {@link #executeAndGetResults()}.
* @param results output of {@link #executeAndGetResults()}
* @param useLogger logger to log the messages into.
* @param atMost will log at most this many errors.
*/
public static <T> void logFailures(List<Either<T, ExecutionException>> results, Logger useLogger, int atMost) {
Logger actualLogger = useLogger == null ? log : useLogger;
Iterator<Either<T, ExecutionException>> it = results.iterator();
int printed = 0;
while (it.hasNext()) {
Either<T, ExecutionException> nextResult = it.next();
if (nextResult instanceof Either.Right) {
ExecutionException exc = ((Either.Right<T, ExecutionException>) nextResult).getRight();
actualLogger.error("Iterator executor failure.", exc);
printed++;
if (printed >= atMost) {
return;
}
}
}
}
Aggregations