use of com.microsoft.azure.hdinsight.spark.common.log.SparkLogLine in project azure-tools-for-java by Microsoft.
the class Session method createSessionRequest.
private Observable<com.microsoft.azure.hdinsight.sdk.rest.livy.interactive.Session> createSessionRequest() {
final URI uri = baseUrl.resolve(REST_SEGMENT_SESSION);
final PostSessions postBody = getCreateParameters().build();
final String json = postBody.convertToJson().orElseThrow(() -> new IllegalArgumentException("Bad session arguments to post."));
getCtrlSubject().onNext(new SparkLogLine(TOOL, Debug, "Create Livy Session by sending request to " + uri + " with body " + json));
final StringEntity entity = new StringEntity(json, StandardCharsets.UTF_8);
entity.setContentType("application/json");
return getHttp().setUserAgent(getUserAgent()).post(uri.toString(), entity, null, null, com.microsoft.azure.hdinsight.sdk.rest.livy.interactive.Session.class);
}
use of com.microsoft.azure.hdinsight.spark.common.log.SparkLogLine in project azure-tools-for-java by Microsoft.
the class ADLSGen2Deploy method deploy.
@Override
public Observable<String> deploy(File src, Observer<SparkLogLine> logSubject) {
// four steps to upload via adls gen2 rest api
// 1.put request to create new dir
// 2.put request to create new file(artifact) which is empty
// 3.patch request to append data to file
// 4.patch request to flush data to file
final URI destURI = getUploadDir();
// remove request / end otherwise invalid url response
final String destStr = destURI.toString();
final String dirPath = destStr.endsWith("/") ? destStr.substring(0, destStr.length() - 1) : destStr;
final String filePath = String.format("%s/%s", dirPath, src.getName());
final ADLSGen2FSOperation op = new ADLSGen2FSOperation(this.http);
return op.createDir(dirPath, "0755").onErrorReturn(err -> {
if (err.getMessage() != null && (err.getMessage().contains(String.valueOf(HttpStatus.SC_FORBIDDEN)) || err.getMessage().contains(String.valueOf(HttpStatus.SC_NOT_FOUND)))) {
// Sample destinationRootPath: https://accountName.dfs.core.windows.net/fsName/SparkSubmission/
String fileSystemRootPath = UriUtil.normalizeWithSlashEnding(URI.create(destinationRootPath)).resolve("../").toString();
String errorMessage = String.format("Failed to create folder %s when uploading Spark application artifacts with error: %s. %s", dirPath, err.getMessage(), getForbiddenErrorHints(fileSystemRootPath));
throw new IllegalArgumentException(errorMessage);
} else {
throw Exceptions.propagate(err);
}
}).doOnNext(ignore -> log().info(String.format("Create filesystem %s successfully.", dirPath))).flatMap(ignore -> op.createFile(filePath, "0755")).flatMap(ignore -> op.uploadData(filePath, src)).doOnNext(ignore -> log().info(String.format("Append data to file %s successfully.", filePath))).map(ignored -> AbfsUri.parse(filePath).getUri().toString());
}
use of com.microsoft.azure.hdinsight.spark.common.log.SparkLogLine in project azure-tools-for-java by Microsoft.
the class SparkBatchJobDebuggerRunner method execute.
/**
* Execute Spark remote debugging action, refer to {@link GenericDebuggerRunner#execute(ExecutionEnvironment)}
* implementations, some internal API leveraged.
*
* @param environment the execution environment
* @throws ExecutionException the exception in execution
*/
@Override
public void execute(final ExecutionEnvironment environment) throws ExecutionException {
final RunProfileState state = environment.getState();
if (state == null) {
return;
}
final Operation operation = environment.getUserData(TelemetryKeys.OPERATION);
final AsyncPromise<ExecutionEnvironment> jobDriverEnvReady = new AsyncPromise<>();
final SparkBatchRemoteDebugState submissionState = (SparkBatchRemoteDebugState) state;
final SparkSubmitModel submitModel = submissionState.getSubmitModel();
// Create SSH debug session firstly
final SparkBatchDebugSession session;
try {
session = SparkBatchDebugSession.factoryByAuth(getSparkJobUrl(submitModel), submitModel.getAdvancedConfigModel()).open().verifyCertificate();
} catch (final Exception e) {
final ExecutionException exp = new ExecutionException("Failed to create SSH session for debugging. " + ExceptionUtils.getRootCauseMessage(e));
EventUtil.logErrorClassNameOnlyWithComplete(operation, ErrorType.systemError, exp, null, null);
throw exp;
}
final Project project = submitModel.getProject();
final ExecutionManager executionManager = ExecutionManager.getInstance(project);
final IdeaSchedulers schedulers = new IdeaSchedulers(project);
final PublishSubject<SparkBatchJobSubmissionEvent> debugEventSubject = PublishSubject.create();
final ISparkBatchDebugJob sparkDebugBatch = (ISparkBatchDebugJob) submissionState.getSparkBatch().clone();
final PublishSubject<SparkLogLine> ctrlSubject = (PublishSubject<SparkLogLine>) sparkDebugBatch.getCtrlSubject();
final SparkBatchJobRemoteDebugProcess driverDebugProcess = new SparkBatchJobRemoteDebugProcess(schedulers, session, sparkDebugBatch, submitModel.getArtifactPath().orElseThrow(() -> new ExecutionException("No artifact selected")), submitModel.getSubmissionParameter().getMainClassName(), submitModel.getAdvancedConfigModel(), ctrlSubject);
final SparkBatchJobDebugProcessHandler driverDebugHandler = new SparkBatchJobDebugProcessHandler(project, driverDebugProcess, debugEventSubject);
// Prepare an independent submission console
final ConsoleViewImpl submissionConsole = new ConsoleViewImpl(project, true);
final RunContentDescriptor submissionDesc = new RunContentDescriptor(submissionConsole, driverDebugHandler, submissionConsole.getComponent(), String.format("Submit %s to cluster %s", submitModel.getSubmissionParameter().getMainClassName(), submitModel.getSubmissionParameter().getClusterName()));
// Show the submission console view
ExecutionManager.getInstance(project).getContentManager().showRunContent(environment.getExecutor(), submissionDesc);
// Use the submission console to display the deployment ctrl message
final Subscription jobSubscription = ctrlSubject.subscribe(typedMessage -> {
final String line = typedMessage.getRawLog() + "\n";
switch(typedMessage.getMessageInfoType()) {
case Error:
submissionConsole.print(line, ConsoleViewContentType.ERROR_OUTPUT);
break;
case Info:
submissionConsole.print(line, ConsoleViewContentType.NORMAL_OUTPUT);
break;
case Log:
submissionConsole.print(line, ConsoleViewContentType.SYSTEM_OUTPUT);
break;
case Warning:
submissionConsole.print(line, ConsoleViewContentType.LOG_WARNING_OUTPUT);
break;
}
}, err -> {
submissionConsole.print(ExceptionUtils.getRootCauseMessage(err), ConsoleViewContentType.ERROR_OUTPUT);
final String errMsg = "The Spark job remote debug is cancelled due to " + ExceptionUtils.getRootCauseMessage(err);
jobDriverEnvReady.setError(errMsg);
EventUtil.logErrorClassNameOnlyWithComplete(operation, ErrorType.systemError, new UncheckedExecutionException(errMsg, err), null, null);
}, () -> {
if (Optional.ofNullable(driverDebugHandler.getUserData(ProcessHandler.TERMINATION_REQUESTED)).orElse(false)) {
final String errMsg = "The Spark job remote debug is cancelled by user.";
jobDriverEnvReady.setError(errMsg);
final Map<String, String> props = ImmutableMap.of("isDebugCancelled", "true");
EventUtil.logErrorClassNameOnlyWithComplete(operation, ErrorType.userError, new ExecutionException(errMsg), props, null);
}
});
// Call after completed or error
debugEventSubject.subscribeOn(Schedulers.io()).doAfterTerminate(session::close).subscribe(debugEvent -> {
try {
if (debugEvent instanceof SparkBatchRemoteDebugHandlerReadyEvent) {
final SparkBatchRemoteDebugHandlerReadyEvent handlerReadyEvent = (SparkBatchRemoteDebugHandlerReadyEvent) debugEvent;
final SparkBatchDebugJobJdbPortForwardedEvent jdbReadyEvent = handlerReadyEvent.getJdbPortForwardedEvent();
if (!jdbReadyEvent.getLocalJdbForwardedPort().isPresent()) {
return;
}
final int localPort = jdbReadyEvent.getLocalJdbForwardedPort().get();
final ExecutionEnvironment forkEnv = forkEnvironment(environment, jdbReadyEvent.getRemoteHost().orElse("unknown"), jdbReadyEvent.isDriver());
final RunProfile runProfile = forkEnv.getRunProfile();
if (!(runProfile instanceof LivySparkBatchJobRunConfiguration)) {
ctrlSubject.onError(new UnsupportedOperationException("Only supports LivySparkBatchJobRunConfiguration type, but got type" + runProfile.getClass().getCanonicalName()));
return;
}
// Reuse the driver's Spark batch job
((LivySparkBatchJobRunConfiguration) runProfile).setSparkRemoteBatch(sparkDebugBatch);
final SparkBatchRemoteDebugState forkState = jdbReadyEvent.isDriver() ? submissionState : (SparkBatchRemoteDebugState) forkEnv.getState();
if (forkState == null) {
return;
}
// Set the debug connection to localhost and local forwarded port to the state
forkState.setRemoteConnection(new RemoteConnection(true, "localhost", Integer.toString(localPort), false));
// Prepare the debug tab console view UI
SparkJobLogConsoleView jobOutputView = new SparkJobLogConsoleView(project);
// Get YARN container log URL port
int containerLogUrlPort = ((SparkBatchRemoteDebugJob) driverDebugProcess.getSparkJob()).getYarnContainerLogUrlPort().toBlocking().single();
// Parse container ID and host URL from driver console view
jobOutputView.getSecondaryConsoleView().addMessageFilter((line, entireLength) -> {
Matcher matcher = Pattern.compile("Launching container (\\w+).* on host ([a-zA-Z_0-9-.]+)", Pattern.CASE_INSENSITIVE).matcher(line);
while (matcher.find()) {
String containerId = matcher.group(1);
// TODO: get port from somewhere else rather than hard code here
URI hostUri = URI.create(String.format("http://%s:%d", matcher.group(2), containerLogUrlPort));
debugEventSubject.onNext(new SparkBatchJobExecutorCreatedEvent(hostUri, containerId));
}
return null;
});
jobOutputView.attachToProcess(handlerReadyEvent.getDebugProcessHandler());
ExecutionResult result = new DefaultExecutionResult(jobOutputView, handlerReadyEvent.getDebugProcessHandler());
forkState.setExecutionResult(result);
forkState.setConsoleView(jobOutputView.getSecondaryConsoleView());
forkState.setRemoteProcessCtrlLogHandler(handlerReadyEvent.getDebugProcessHandler());
if (jdbReadyEvent.isDriver()) {
// Let the debug console view to handle the control log
jobSubscription.unsubscribe();
// Resolve job driver promise, handle the driver VM attaching separately
jobDriverEnvReady.setResult(forkEnv);
} else {
// Start Executor debugging
executionManager.startRunProfile(forkEnv, () -> toIdeaPromise(attachAndDebug(forkEnv, forkState)));
}
} else if (debugEvent instanceof SparkBatchJobExecutorCreatedEvent) {
SparkBatchJobExecutorCreatedEvent executorCreatedEvent = (SparkBatchJobExecutorCreatedEvent) debugEvent;
final String containerId = executorCreatedEvent.getContainerId();
final SparkBatchRemoteDebugJob debugJob = (SparkBatchRemoteDebugJob) driverDebugProcess.getSparkJob();
URI internalHostUri = executorCreatedEvent.getHostUri();
URI executorLogUrl = debugJob.convertToPublicLogUri(internalHostUri).map(uri -> uri.resolve(String.format("node/containerlogs/%s/livy", containerId))).toBlocking().singleOrDefault(internalHostUri);
// Create an Executor Debug Process
SparkBatchJobRemoteDebugExecutorProcess executorDebugProcess = new SparkBatchJobRemoteDebugExecutorProcess(schedulers, debugJob, internalHostUri.getHost(), driverDebugProcess.getDebugSession(), executorLogUrl.toString());
SparkBatchJobDebugProcessHandler executorDebugHandler = new SparkBatchJobDebugProcessHandler(project, executorDebugProcess, debugEventSubject);
executorDebugHandler.getRemoteDebugProcess().start();
}
} catch (final ExecutionException e) {
EventUtil.logErrorClassNameOnlyWithComplete(operation, ErrorType.systemError, new UncheckedExecutionException(e), null, null);
throw new UncheckedExecutionException(e);
}
});
driverDebugHandler.getRemoteDebugProcess().start();
// Driver side execute, leverage Intellij Async Promise, to wait for the Spark app deployed
executionManager.startRunProfile(environment, () -> jobDriverEnvReady.thenAsync(driverEnv -> toIdeaPromise(attachAndDebug(driverEnv, state))));
}
use of com.microsoft.azure.hdinsight.spark.common.log.SparkLogLine in project azure-tools-for-java by Microsoft.
the class SparkBatchJobRunner method doExecute.
@Nullable
@Override
protected RunContentDescriptor doExecute(@NotNull RunProfileState state, @NotNull ExecutionEnvironment environment) throws ExecutionException {
final SparkBatchRemoteRunProfileState submissionState = (SparkBatchRemoteRunProfileState) state;
final SparkSubmitModel submitModel = submissionState.getSubmitModel();
final Project project = submitModel.getProject();
// Prepare the run table console view UI
final SparkJobLogConsoleView jobOutputView = new SparkJobLogConsoleView(project);
final String artifactPath = submitModel.getArtifactPath().orElse(null);
assert artifactPath != null : "artifactPath should be checked in LivySparkBatchJobRunConfiguration::checkSubmissionConfigurationBeforeRun";
// To address issue https://github.com/microsoft/azure-tools-for-java/issues/4021.
// In this issue, when user click rerun button, we are still using the legacy ctrlSubject which has already sent
// "onComplete" message when the job is done in the previous time. To avoid this issue, We clone a new Spark
// batch job instance to re-initialize everything in the object.
final ISparkBatchJob sparkBatch = submissionState.getSparkBatch().clone();
final PublishSubject<SparkLogLine> ctrlSubject = (PublishSubject<SparkLogLine>) sparkBatch.getCtrlSubject();
final SparkBatchJobRemoteProcess remoteProcess = new SparkBatchJobRemoteProcess(new IdeaSchedulers(project), sparkBatch, artifactPath, submitModel.getSubmissionParameter().getMainClassName(), ctrlSubject);
final SparkBatchJobRunProcessHandler processHandler = new SparkBatchJobRunProcessHandler(remoteProcess, "Package and deploy the job to Spark cluster", null);
// After attaching, the console view can read the process inputStreams and display them
jobOutputView.attachToProcess(processHandler);
remoteProcess.start();
final Operation operation = environment.getUserData(TelemetryKeys.OPERATION);
// After we define a new AnAction class, IntelliJ will construct a new AnAction instance for us.
// Use one action instance can keep behaviours like isEnabled() consistent
final SparkBatchJobDisconnectAction disconnectAction = (SparkBatchJobDisconnectAction) ActionManager.getInstance().getAction("Actions.SparkJobDisconnect");
disconnectAction.init(remoteProcess, operation);
sendTelemetryForParameters(submitModel, operation);
final ExecutionResult result = new DefaultExecutionResult(jobOutputView, processHandler, Separator.getInstance(), disconnectAction);
submissionState.setExecutionResult(result);
final ConsoleView consoleView = jobOutputView.getSecondaryConsoleView();
submissionState.setConsoleView(consoleView);
addConsoleViewFilter(remoteProcess.getSparkJob(), consoleView);
submissionState.setRemoteProcessCtrlLogHandler(processHandler);
ctrlSubject.subscribe(messageWithType -> {
}, err -> disconnectAction.setEnabled(false), () -> disconnectAction.setEnabled(false));
return super.doExecute(state, environment);
}
use of com.microsoft.azure.hdinsight.spark.common.log.SparkLogLine in project azure-tools-for-java by Microsoft.
the class SparkBatchJob method createBatchJob.
/**
* Create a batch Spark job
*
* @return the current instance for chain calling
* @throws IOException the exceptions for networking connection issues related
*/
private SparkBatchJob createBatchJob() throws IOException {
if (getConnectUri() == null) {
throw new SparkJobNotConfiguredException("Can't get Spark job connection URI, " + "please configure Spark cluster which the Spark job will be submitted.");
}
// Submit the batch job
final HttpResponse httpResponse = this.getSubmission().createBatchSparkJob(this.getConnectUri().toString(), this.getSubmissionParameter());
// Get the batch ID from response and save it
if (httpResponse.getCode() >= 200 && httpResponse.getCode() < 300) {
final SparkSubmitResponse jobResp = ObjectConvertUtils.convertJsonToObject(httpResponse.getMessage(), SparkSubmitResponse.class).orElseThrow(() -> new UnknownServiceException("Bad spark job response: " + httpResponse.getMessage()));
this.setBatchId(jobResp.getId());
getCtrlSubject().onNext(new SparkLogLine(TOOL, Info, "Spark Batch submission " + httpResponse.toString()));
return this;
}
throw new UnknownServiceException(String.format("Failed to submit Spark batch job. error code: %d, type: %s, reason: %s.", httpResponse.getCode(), httpResponse.getContent(), httpResponse.getMessage()));
}
Aggregations