use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class RestliServiceBasedLimiterTest method testServerFailover.
@Test
public void testServerFailover() throws Exception {
try (Closer closer = Closer.create()) {
SharedLimiterKey res1key = new SharedLimiterKey("res1");
Map<String, String> configMap = Maps.newHashMap();
TestingServer zkTestingServer = closer.register(new TestingServer(-1));
configMap.put(ThrottlingGuiceServletConfig.ZK_STRING_KEY, zkTestingServer.getConnectString());
configMap.put(ThrottlingGuiceServletConfig.HA_CLUSTER_NAME, RestliServiceBasedLimiterTest.class.getSimpleName() + "_cluster");
Config config = ConfigFactory.parseMap(configMap);
RestliServer server2500 = createAndStartServer(config, 2500);
RestliServer server2501 = createAndStartServer(config, 2501);
SharedResourcesBroker<SimpleScopeType> broker = SharedResourcesBrokerFactory.createDefaultTopLevelBroker(ConfigFactory.empty(), SimpleScopeType.GLOBAL.defaultScopeInstance());
RedirectAwareRestClientRequestSender requestSender = new RedirectAwareRestClientRequestSender(broker, Lists.newArrayList(server2500.getServer().getURIPrefix(), server2501.getServer().getURIPrefix()));
RestliServiceBasedLimiter limiter = RestliServiceBasedLimiter.builder().requestSender(requestSender).resourceLimited(res1key.getResourceLimitedPath()).serviceIdentifier("service").build();
Assert.assertNotNull(limiter.acquirePermits(20));
limiter.clearAllStoredPermits();
server2500.close();
Assert.assertNotNull(limiter.acquirePermits(20));
Assert.assertEquals(parsePortOfCurrentServerPrefix(requestSender), 2501);
limiter.clearAllStoredPermits();
server2500 = createAndStartServer(config, 2500);
Assert.assertNotNull(limiter.acquirePermits(20));
limiter.clearAllStoredPermits();
// leader is currently 2501
Assert.assertEquals(parsePortOfCurrentServerPrefix(requestSender), 2501);
// set request to 2500 (not leader)
requestSender.updateRestClient(server2500.getServer().getURIPrefix(), "test");
Assert.assertEquals(parsePortOfCurrentServerPrefix(requestSender), 2500);
Assert.assertNotNull(limiter.acquirePermits(20));
// verify request sender switched back to leader
Assert.assertEquals(parsePortOfCurrentServerPrefix(requestSender), 2501);
server2501.close();
Assert.assertNotNull(limiter.acquirePermits(20));
limiter.clearAllStoredPermits();
server2500.close();
Assert.assertNull(limiter.acquirePermits(20));
limiter.clearAllStoredPermits();
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class TestFailover method test.
@Test
public void test() throws Exception {
try (Closer closer = Closer.create()) {
Map<String, String> configMap = Maps.newHashMap();
TestingServer zkTestingServer = closer.register(new TestingServer(-1));
configMap.put(ThrottlingGuiceServletConfig.ZK_STRING_KEY, zkTestingServer.getConnectString());
configMap.put(ThrottlingGuiceServletConfig.HA_CLUSTER_NAME, TestFailover.class.getSimpleName() + "_cluster");
Config config = ConfigFactory.parseMap(configMap);
ThrottlingGuiceServletConfig server2001 = createServerAtPort(config, 2001);
PermitAllocation allocation = sendRequestToServer(server2001, 10);
Assert.assertTrue(allocation.getPermits() >= 1);
ThrottlingGuiceServletConfig server2002 = createServerAtPort(config, 2002);
allocation = sendRequestToServer(server2001, 10);
Assert.assertTrue(allocation.getPermits() >= 1);
try {
sendRequestToServer(server2002, 10);
Assert.fail();
} catch (RestLiServiceException exc) {
Assert.assertTrue(exc.hasErrorDetails());
Assert.assertTrue(exc.getErrorDetails().containsKey(LimiterServerResource.LOCATION_301));
Assert.assertEquals(new URI(exc.getErrorDetails().get(LimiterServerResource.LOCATION_301).toString()).getPort(), 2001);
}
server2001.close();
allocation = sendRequestToServer(server2002, 10);
Assert.assertTrue(allocation.getPermits() >= 1);
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class AbstractJobLauncher method cleanupStagingDataPerTask.
private static void cleanupStagingDataPerTask(JobState jobState) {
Closer closer = Closer.create();
Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
try {
for (TaskState taskState : jobState.getTaskStates()) {
try {
JobLauncherUtils.cleanTaskStagingData(taskState, LOG, closer, parallelRunners);
} catch (IOException e) {
LOG.error(String.format("Failed to clean staging data for task %s: %s", taskState.getTaskId(), e), e);
}
}
} finally {
try {
closer.close();
} catch (IOException e) {
LOG.error("Failed to clean staging data", e);
}
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class AbstractJobLauncher method cleanLeftoverStagingData.
/**
* Cleanup the left-over staging data possibly from the previous run of the job that may have failed
* and not cleaned up its staging data.
*
* Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup
* staging data per task, or to cleanup entire job's staging data at once.
*
* Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s.
*/
private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) {
// Clean up will be done by initializer.
return;
}
try {
if (!canCleanStagingData(jobState)) {
LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data.");
return;
}
} catch (IOException e) {
throw new JobException("Failed to check unfinished commit sequences", e);
}
try {
if (this.jobContext.shouldCleanupStagingDataPerTask()) {
if (workUnits.isSafeToMaterialize()) {
Closer closer = Closer.create();
Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
try {
for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) {
JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
} else {
throw new RuntimeException("Work unit streams do not support cleaning staging data per task.");
}
} else {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_OLD_JOBS_DATA, ConfigurationKeys.DEFAULT_CLEANUP_OLD_JOBS_DATA)) {
JobLauncherUtils.cleanUpOldJobData(jobState, LOG, jobContext.getStagingDirProvided(), jobContext.getOutputDirProvided());
}
JobLauncherUtils.cleanJobStagingData(jobState, LOG);
}
} catch (Throwable t) {
// Catch Throwable instead of just IOException to make sure failure of this won't affect the current run
LOG.error("Failed to clean leftover staging data", t);
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class AbstractJobLauncher method launchJob.
@Override
public void launchJob(JobListener jobListener) throws JobException {
String jobId = this.jobContext.getJobId();
final JobState jobState = this.jobContext.getJobState();
try {
MDC.put(ConfigurationKeys.JOB_NAME_KEY, this.jobContext.getJobName());
MDC.put(ConfigurationKeys.JOB_KEY_KEY, this.jobContext.getJobKey());
TimingEvent launchJobTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.FULL_JOB_EXECUTION);
try (Closer closer = Closer.create()) {
closer.register(this.jobContext);
notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_PREPARE, new JobListenerAction() {
@Override
public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
jobListener.onJobPrepare(jobContext);
}
});
if (this.jobContext.getSemantics() == DeliverySemantics.EXACTLY_ONCE) {
// If exactly-once is used, commit sequences of the previous run must be successfully compelted
// before this run can make progress.
executeUnfinishedCommitSequences(jobState.getJobName());
}
TimingEvent workUnitsCreationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_CREATION);
Source<?, ?> source = this.jobContext.getSource();
WorkUnitStream workUnitStream;
if (source instanceof WorkUnitStreamSource) {
workUnitStream = ((WorkUnitStreamSource) source).getWorkunitStream(jobState);
} else {
workUnitStream = new BasicWorkUnitStream.Builder(source.getWorkunits(jobState)).build();
}
workUnitsCreationTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.WORK_UNITS_CREATION));
// The absence means there is something wrong getting the work units
if (workUnitStream == null || workUnitStream.getWorkUnits() == null) {
this.eventSubmitter.submit(JobEvent.WORK_UNITS_MISSING);
jobState.setState(JobState.RunningState.FAILED);
throw new JobException("Failed to get work units for job " + jobId);
}
// No work unit to run
if (!workUnitStream.getWorkUnits().hasNext()) {
this.eventSubmitter.submit(JobEvent.WORK_UNITS_EMPTY);
LOG.warn("No work units have been created for job " + jobId);
jobState.setState(JobState.RunningState.COMMITTED);
notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() {
@Override
public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
jobListener.onJobCompletion(jobContext);
}
});
return;
}
// Initialize writer and converter(s)
closer.register(WriterInitializerFactory.newInstace(jobState, workUnitStream)).initialize();
closer.register(ConverterInitializerFactory.newInstance(jobState, workUnitStream)).initialize();
TimingEvent stagingDataCleanTimer = this.eventSubmitter.getTimingEvent(TimingEvent.RunJobTimings.MR_STAGING_DATA_CLEAN);
// Cleanup left-over staging data possibly from the previous run. This is particularly
// important if the current batch of WorkUnits include failed WorkUnits from the previous
// run which may still have left-over staging data not cleaned up yet.
cleanLeftoverStagingData(workUnitStream, jobState);
stagingDataCleanTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.MR_STAGING_DATA_CLEAN));
long startTime = System.currentTimeMillis();
jobState.setStartTime(startTime);
jobState.setState(JobState.RunningState.RUNNING);
try {
LOG.info("Starting job " + jobId);
notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_START, new JobListenerAction() {
@Override
public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
jobListener.onJobStart(jobContext);
}
});
TimingEvent workUnitsPreparationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_PREPARATION);
// Add task ids
workUnitStream = prepareWorkUnits(workUnitStream, jobState);
// Remove skipped workUnits from the list of work units to execute.
workUnitStream = workUnitStream.filter(new SkippedWorkUnitsFilter(jobState));
// Add surviving tasks to jobState
workUnitStream = workUnitStream.transform(new MultiWorkUnitForEach() {
@Override
public void forWorkUnit(WorkUnit workUnit) {
jobState.incrementTaskCount();
jobState.addTaskState(new TaskState(new WorkUnitState(workUnit, jobState)));
}
});
workUnitsPreparationTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.WORK_UNITS_PREPARATION));
// Write job execution info to the job history store before the job starts to run
this.jobContext.storeJobExecutionInfo();
TimingEvent jobRunTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_RUN);
// Start the job and wait for it to finish
runWorkUnitStream(workUnitStream);
jobRunTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_RUN));
this.eventSubmitter.submit(CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, "JOB_" + jobState.getState()));
// Check and set final job jobPropsState upon job completion
if (jobState.getState() == JobState.RunningState.CANCELLED) {
LOG.info(String.format("Job %s has been cancelled, aborting now", jobId));
return;
}
TimingEvent jobCommitTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_COMMIT);
this.jobContext.finalizeJobStateBeforeCommit();
this.jobContext.commit();
postProcessJobState(jobState);
jobCommitTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_COMMIT));
} finally {
long endTime = System.currentTimeMillis();
jobState.setEndTime(endTime);
jobState.setDuration(endTime - jobState.getStartTime());
}
} catch (Throwable t) {
jobState.setState(JobState.RunningState.FAILED);
String errMsg = "Failed to launch and run job " + jobId;
LOG.error(errMsg + ": " + t, t);
} finally {
try {
TimingEvent jobCleanupTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_CLEANUP);
cleanupStagingData(jobState);
jobCleanupTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_CLEANUP));
// Write job execution info to the job history store upon job termination
this.jobContext.storeJobExecutionInfo();
} finally {
launchJobTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.FULL_JOB_EXECUTION));
}
}
for (JobState.DatasetState datasetState : this.jobContext.getDatasetStatesByUrns().values()) {
// Set the overall job state to FAILED if the job failed to process any dataset
if (datasetState.getState() == JobState.RunningState.FAILED) {
jobState.setState(JobState.RunningState.FAILED);
LOG.warn("At least one dataset state is FAILED. Setting job state to FAILED.");
break;
}
}
notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() {
@Override
public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
jobListener.onJobCompletion(jobContext);
}
});
if (jobState.getState() == JobState.RunningState.FAILED) {
notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_FAILED, new JobListenerAction() {
@Override
public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
jobListener.onJobFailure(jobContext);
}
});
throw new JobException(String.format("Job %s failed", jobId));
}
} finally {
// Stop metrics reporting
if (this.jobContext.getJobMetricsOptional().isPresent()) {
JobMetrics.remove(jobState);
}
MDC.remove(ConfigurationKeys.JOB_NAME_KEY);
MDC.remove(ConfigurationKeys.JOB_KEY_KEY);
}
}
Aggregations