Search in sources :

Example 1 with RawJobMetrics

use of com.hazelcast.jet.impl.metrics.RawJobMetrics in project hazelcast by hazelcast.

the class JobRepository method completeJob.

/**
 * Puts a JobResult for the given job and deletes the JobRecord.
 *
 * @throws JobNotFoundException  if the JobRecord is not found
 * @throws IllegalStateException if the JobResult is already present
 */
void completeJob(@Nonnull MasterContext masterContext, @Nullable List<RawJobMetrics> terminalMetrics, @Nullable Throwable error, long completionTime) {
    long jobId = masterContext.jobId();
    JobConfig config = masterContext.jobRecord().getConfig();
    long creationTime = masterContext.jobRecord().getCreationTime();
    JobResult jobResult = new JobResult(jobId, config, creationTime, completionTime, toErrorMsg(error));
    if (terminalMetrics != null) {
        try {
            List<RawJobMetrics> prevMetrics = jobMetrics.get().put(jobId, terminalMetrics);
            if (prevMetrics != null) {
                logger.warning("Overwriting job metrics for job " + jobResult);
            }
        } catch (Exception e) {
            logger.warning("Storing the job metrics failed, ignoring: " + e, e);
        }
    }
    for (; ; ) {
        // keep trying to store the JobResult until it succeeds
        try {
            jobResults.get().set(jobId, jobResult);
            break;
        } catch (Exception e) {
            // if the local instance was shut down, re-throw the error
            LifecycleService lifecycleService = instance.getLifecycleService();
            if (e instanceof HazelcastInstanceNotActiveException && (!lifecycleService.isRunning())) {
                throw e;
            }
            // retry otherwise, after a delay
            long retryTimeoutSeconds = 1;
            logger.warning("Failed to store JobResult, will retry in " + retryTimeoutSeconds + " seconds: " + e, e);
            LockSupport.parkNanos(SECONDS.toNanos(retryTimeoutSeconds));
        }
    }
    deleteJob(jobId);
}
Also used : HazelcastInstanceNotActiveException(com.hazelcast.core.HazelcastInstanceNotActiveException) LifecycleService(com.hazelcast.core.LifecycleService) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) JobConfig(com.hazelcast.jet.config.JobConfig) HazelcastInstanceNotActiveException(com.hazelcast.core.HazelcastInstanceNotActiveException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) JetException(com.hazelcast.jet.JetException) JobNotFoundException(com.hazelcast.jet.core.JobNotFoundException) IOException(java.io.IOException)

Example 2 with RawJobMetrics

use of com.hazelcast.jet.impl.metrics.RawJobMetrics in project hazelcast by hazelcast.

the class JobMetricsUtil method toJobMetrics.

static JobMetrics toJobMetrics(List<RawJobMetrics> rawJobMetrics) {
    JobMetricsConsumer consumer = null;
    for (RawJobMetrics metrics : rawJobMetrics) {
        if (metrics.getBlob() == null) {
            continue;
        }
        if (consumer == null) {
            consumer = new JobMetricsConsumer();
        }
        consumer.timestamp = metrics.getTimestamp();
        MetricsCompressor.extractMetrics(metrics.getBlob(), consumer);
    }
    return consumer == null ? JobMetrics.empty() : JobMetrics.of(consumer.metrics);
}
Also used : RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics)

Example 3 with RawJobMetrics

use of com.hazelcast.jet.impl.metrics.RawJobMetrics in project hazelcast by hazelcast.

the class JobExecutionService method runLightJob.

public CompletableFuture<RawJobMetrics> runLightJob(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants, ExecutionPlan plan) {
    assert executionId == jobId : "executionId(" + idToString(executionId) + ") != jobId(" + idToString(jobId) + ")";
    verifyClusterInformation(jobId, executionId, coordinator, coordinatorMemberListVersion, participants);
    failIfNotRunning();
    ExecutionContext execCtx;
    synchronized (mutex) {
        addExecutionContextJobId(jobId, executionId, coordinator);
        execCtx = executionContexts.computeIfAbsent(executionId, x -> new ExecutionContext(nodeEngine, jobId, executionId, true));
    }
    try {
        Set<Address> addresses = participants.stream().map(MemberInfo::getAddress).collect(toSet());
        ClassLoader jobCl = jobClassloaderService.getClassLoader(jobId);
        // We don't create the CL for light jobs.
        assert jobClassloaderService.getClassLoader(jobId) == null;
        doWithClassLoader(jobCl, () -> execCtx.initialize(coordinator, addresses, plan));
    } catch (Throwable e) {
        completeExecution(execCtx, new CancellationException());
        throw e;
    }
    // initial log entry with all of jobId, jobName, executionId
    if (logger.isFineEnabled()) {
        logger.fine("Execution plan for light job ID=" + idToString(jobId) + ", jobName=" + (execCtx.jobName() != null ? '\'' + execCtx.jobName() + '\'' : "null") + ", executionId=" + idToString(executionId) + " initialized, will start the execution");
    }
    return beginExecution0(execCtx, false);
}
Also used : Address(com.hazelcast.cluster.Address) HazelcastInstanceNotActiveException(com.hazelcast.core.HazelcastInstanceNotActiveException) InvocationFuture(com.hazelcast.spi.impl.operationservice.impl.InvocationFuture) ScheduledFuture(java.util.concurrent.ScheduledFuture) Member(com.hazelcast.cluster.Member) UnaryOperator(java.util.function.UnaryOperator) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) MwCounter(com.hazelcast.internal.util.counters.MwCounter) Map(java.util.Map) DynamicMetricsProvider(com.hazelcast.internal.metrics.DynamicMetricsProvider) Counter(com.hazelcast.internal.util.counters.Counter) Collectors.toSet(java.util.stream.Collectors.toSet) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) CancellationException(java.util.concurrent.CancellationException) Probe(com.hazelcast.internal.metrics.Probe) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) TargetNotMemberException(com.hazelcast.spi.exception.TargetNotMemberException) Objects(java.util.Objects) SenderReceiverKey(com.hazelcast.jet.impl.execution.ExecutionContext.SenderReceiverKey) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MetricNames(com.hazelcast.jet.core.metrics.MetricNames) Entry(java.util.Map.Entry) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) MetricsRegistry(com.hazelcast.internal.metrics.MetricsRegistry) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) MembershipManager(com.hazelcast.internal.cluster.impl.MembershipManager) MetricsCompressor(com.hazelcast.internal.metrics.impl.MetricsCompressor) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) SenderTasklet(com.hazelcast.jet.impl.execution.SenderTasklet) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) MINUTES(java.util.concurrent.TimeUnit.MINUTES) Function(java.util.function.Function) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) EXECUTION(com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.EXECUTION) MetricDescriptor(com.hazelcast.internal.metrics.MetricDescriptor) Collections.newSetFromMap(java.util.Collections.newSetFromMap) ILogger(com.hazelcast.logging.ILogger) Operation(com.hazelcast.spi.impl.operationservice.Operation) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) TaskletExecutionService(com.hazelcast.jet.impl.execution.TaskletExecutionService) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Nonnull(javax.annotation.Nonnull) CheckLightJobsOperation(com.hazelcast.jet.impl.operation.CheckLightJobsOperation) Nullable(javax.annotation.Nullable) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) MemberLeftException(com.hazelcast.core.MemberLeftException) MetricsCollectionContext(com.hazelcast.internal.metrics.MetricsCollectionContext) MetricsCollector(com.hazelcast.internal.metrics.collectors.MetricsCollector) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) MetricTags(com.hazelcast.jet.core.metrics.MetricTags) TriggerMemberListPublishOp(com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) Util.jobIdAndExecutionId(com.hazelcast.jet.impl.util.Util.jobIdAndExecutionId) Util(com.hazelcast.jet.Util) SECONDS(java.util.concurrent.TimeUnit.SECONDS) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) Address(com.hazelcast.cluster.Address) CancellationException(java.util.concurrent.CancellationException) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader)

Example 4 with RawJobMetrics

use of com.hazelcast.jet.impl.metrics.RawJobMetrics in project hazelcast by hazelcast.

the class MasterJobContext method onStartExecutionComplete.

private void onStartExecutionComplete(Throwable error, Collection<Entry<MemberInfo, Object>> responses) {
    JobStatus status = mc.jobStatus();
    if (status != STARTING && status != RUNNING) {
        logCannotComplete(error);
        error = new IllegalStateException("Job coordination failed");
    }
    setJobMetrics(responses.stream().filter(en -> en.getValue() instanceof RawJobMetrics).map(e1 -> (RawJobMetrics) e1.getValue()).collect(Collectors.toList()));
    if (error instanceof JobTerminateRequestedException && ((JobTerminateRequestedException) error).mode().isWithTerminalSnapshot()) {
        Throwable finalError = error;
        // The terminal snapshot on members is always completed before replying to StartExecutionOp.
        // However, the response to snapshot operations can be processed after the response to
        // StartExecutionOp, so wait for that too.
        mc.snapshotContext().terminalSnapshotFuture().whenCompleteAsync(withTryCatch(logger, (r, e) -> finalizeJob(finalError)));
    } else {
        if (error instanceof ExecutionNotFoundException) {
            // If the StartExecutionOperation didn't find the execution, it means that it was cancelled.
            if (requestedTerminationMode != null) {
                // This cancellation can be because the master cancelled it. If that's the case, convert the exception
                // to JobTerminateRequestedException.
                error = new JobTerminateRequestedException(requestedTerminationMode).initCause(error);
            }
        // The cancellation can also happen if some participant left and
        // the target cancelled the execution locally in JobExecutionService.onMemberRemoved().
        // We keep this (and possibly other) exceptions as they are
        // and let the execution complete with failure.
        }
        finalizeJob(error);
    }
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus) Address(com.hazelcast.cluster.Address) SUSPEND(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.SUSPEND) NOT_RUNNING(com.hazelcast.jet.core.JobStatus.NOT_RUNNING) GetLocalJobMetricsOperation(com.hazelcast.jet.impl.operation.GetLocalJobMetricsOperation) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) ExceptionUtil.isTopologyException(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologyException) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) RESTART(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.RESTART) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) TerminatedWithSnapshotException(com.hazelcast.jet.impl.exception.TerminatedWithSnapshotException) Collectors.toMap(java.util.stream.Collectors.toMap) Functions.entryKey(com.hazelcast.function.Functions.entryKey) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) JobMetrics(com.hazelcast.jet.core.metrics.JobMetrics) CancellationException(java.util.concurrent.CancellationException) CANCEL_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_GRACEFUL) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) CANCEL_FORCEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL) Objects(java.util.Objects) Util(com.hazelcast.jet.impl.util.Util) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MetricNames(com.hazelcast.jet.core.metrics.MetricNames) Entry(java.util.Map.Entry) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) JetDisabledException(com.hazelcast.jet.impl.exception.JetDisabledException) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) TerminateExecutionOperation(com.hazelcast.jet.impl.operation.TerminateExecutionOperation) ExceptionUtil.isRestartableException(com.hazelcast.jet.impl.util.ExceptionUtil.isRestartableException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LoggingUtil.logFinest(com.hazelcast.jet.impl.util.LoggingUtil.logFinest) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionService(com.hazelcast.spi.impl.executionservice.ExecutionService) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Util.formatJobDuration(com.hazelcast.jet.impl.util.Util.formatJobDuration) ActionAfterTerminate(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) JetException(com.hazelcast.jet.JetException) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) COORDINATOR(com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR) ILogger(com.hazelcast.logging.ILogger) SnapshotValidator.validateSnapshot(com.hazelcast.jet.impl.SnapshotValidator.validateSnapshot) ExceptionUtil.rethrow(com.hazelcast.jet.impl.util.ExceptionUtil.rethrow) Operation(com.hazelcast.spi.impl.operationservice.Operation) Util.entry(com.hazelcast.jet.Util.entry) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) BiConsumer(java.util.function.BiConsumer) MembersView(com.hazelcast.internal.cluster.impl.MembersView) LocalMemberResetException(com.hazelcast.core.LocalMemberResetException) RESTART_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.RESTART_GRACEFUL) Edge(com.hazelcast.jet.core.Edge) Version(com.hazelcast.version.Version) EXPORTED_SNAPSHOTS_PREFIX(com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX) Nonnull(javax.annotation.Nonnull) Tuple2(com.hazelcast.jet.datamodel.Tuple2) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) Measurement(com.hazelcast.jet.core.metrics.Measurement) SUSPENDED_EXPORTING_SNAPSHOT(com.hazelcast.jet.core.JobStatus.SUSPENDED_EXPORTING_SNAPSHOT) Util.toList(com.hazelcast.jet.impl.util.Util.toList) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) MetricTags(com.hazelcast.jet.core.metrics.MetricTags) NONE(com.hazelcast.jet.config.ProcessingGuarantee.NONE) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Tuple2.tuple2(com.hazelcast.jet.datamodel.Tuple2.tuple2) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Collections(java.util.Collections) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException)

Aggregations

RawJobMetrics (com.hazelcast.jet.impl.metrics.RawJobMetrics)4 Address (com.hazelcast.cluster.Address)2 HazelcastInstanceNotActiveException (com.hazelcast.core.HazelcastInstanceNotActiveException)2 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)2 Util.idToString (com.hazelcast.jet.Util.idToString)2 TopologyChangedException (com.hazelcast.jet.core.TopologyChangedException)2 MetricNames (com.hazelcast.jet.core.metrics.MetricNames)2 MetricTags (com.hazelcast.jet.core.metrics.MetricTags)2 JetDelegatingClassLoader (com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader)2 ExecutionNotFoundException (com.hazelcast.jet.impl.exception.ExecutionNotFoundException)2 JobTerminateRequestedException (com.hazelcast.jet.impl.exception.JobTerminateRequestedException)2 ExecutionPlan (com.hazelcast.jet.impl.execution.init.ExecutionPlan)2 ExceptionUtil.peel (com.hazelcast.jet.impl.util.ExceptionUtil.peel)2 ExceptionUtil.withTryCatch (com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch)2 LoggingUtil (com.hazelcast.jet.impl.util.LoggingUtil)2 Util.doWithClassLoader (com.hazelcast.jet.impl.util.Util.doWithClassLoader)2 ILogger (com.hazelcast.logging.ILogger)2 Operation (com.hazelcast.spi.impl.operationservice.Operation)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2