package io.hops.hopsworks.common.jobs.yarn;

import io.hops.hopsworks.common.dao.jobhistory.ExecutionFacade;
import io.hops.hopsworks.common.jobs.JobsMonitor;
import io.hops.hopsworks.common.jobs.execution.ExecutionUpdateController;
import io.hops.hopsworks.common.util.PayaraClusterManager;
import io.hops.hopsworks.common.util.Settings;
import io.hops.hopsworks.common.yarn.YarnClientService;
import io.hops.hopsworks.common.yarn.YarnClientWrapper;
import io.hops.hopsworks.persistence.entity.jobs.configuration.history.JobFinalStatus;
import io.hops.hopsworks.persistence.entity.jobs.configuration.history.JobState;
import io.hops.hopsworks.persistence.entity.jobs.history.Execution;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.annotation.Resource;
import javax.ejb.DependsOn;
import javax.ejb.EJB;
import javax.ejb.Singleton;
import javax.ejb.Startup;
import javax.ejb.Timeout;
import javax.ejb.Timer;
import javax.ejb.TimerConfig;
import javax.ejb.TimerService;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.exceptions.YarnException;

@DependsOn({"Settings"})
@Singleton
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
@Startup
/* loaded from: input_file:io/hops/hopsworks/common/jobs/yarn/YarnJobsMonitor.class */
public class YarnJobsMonitor implements JobsMonitor {
    private static final Logger LOGGER = Logger.getLogger(YarnJobsMonitor.class.getName());

    @EJB
    private Settings settings;

    @EJB
    private ExecutionFacade executionFacade;

    @EJB
    private ExecutionUpdateController executionUpdateController;

    @EJB
    private YarnExecutionFinalizer execFinalizer;

    @EJB
    private YarnMonitor yarnMonitor;

    @EJB
    private PayaraClusterManager payaraClusterManager;

    @EJB
    private YarnClientService yarnClientService;

    @Resource
    private TimerService timerService;
    private Timer timer;
    private int maxStatusPollRetry;
    Map<String, Integer> failures = new HashMap();
    private final Map<ApplicationId, Future<Execution>> copyLogsFutures = new HashMap();

    @PostConstruct
    public void init() {
        this.timer = this.timerService.createIntervalTimer(0L, 5000L, new TimerConfig("Yarn job monitor timer", false));
    }

    @PreDestroy
    public void destroy() {
        if (this.timer != null) {
            this.timer.cancel();
        }
    }

    @Timeout
    public synchronized void yarnJobMonitor(Timer timer) {
        if (this.payaraClusterManager.amIThePrimary()) {
            YarnClientWrapper yarnClientWrapper = null;
            try {
                try {
                    yarnClientWrapper = this.yarnClientService.getYarnClientSuper();
                    HashMap hashMap = new HashMap();
                    List<Execution> findNotFinished = this.executionFacade.findNotFinished();
                    if (findNotFinished != null && !findNotFinished.isEmpty()) {
                        for (Execution execution : findNotFinished) {
                            if (execution.getAppId() != null) {
                                hashMap.put(execution.getAppId(), execution);
                            }
                        }
                        this.maxStatusPollRetry = this.settings.getMaxStatusPollRetry();
                        ArrayList arrayList = new ArrayList();
                        for (Map.Entry entry : hashMap.entrySet()) {
                            if (internalMonitor(yarnClientWrapper.getYarnClient(), ApplicationId.fromString((String) entry.getKey()), (Execution) hashMap.get(entry.getKey())) == null) {
                                arrayList.add(entry.getKey());
                            }
                        }
                        Iterator it = arrayList.iterator();
                        while (it.hasNext()) {
                            this.failures.remove((String) it.next());
                        }
                        this.copyLogsFutures.entrySet().removeIf(entry2 -> {
                            return ((Future) entry2.getValue()).isDone();
                        });
                    }
                    this.yarnClientService.closeYarnClient(yarnClientWrapper);
                } catch (Exception e) {
                    LOGGER.log(Level.SEVERE, "Error while monitoring jobs", (Throwable) e);
                    this.yarnClientService.closeYarnClient(yarnClientWrapper);
                }
            } catch (Throwable th) {
                this.yarnClientService.closeYarnClient(yarnClientWrapper);
                throw th;
            }
        }
    }

    private Execution internalMonitor(YarnClient yarnClient, ApplicationId applicationId, Execution execution) {
        try {
            YarnApplicationState applicationState = this.yarnMonitor.getApplicationState(yarnClient, applicationId);
            execution = updateFinalStatus(JobFinalStatus.getJobFinalStatus(this.yarnMonitor.getFinalApplicationStatus(yarnClient, applicationId)), updateState(JobState.getJobState(applicationState), updateProgress(this.yarnMonitor.getProgress(yarnClient, applicationId), execution)));
            if ((applicationState == YarnApplicationState.FAILED || applicationState == YarnApplicationState.FINISHED || applicationState == YarnApplicationState.KILLED) && !this.copyLogsFutures.containsKey(applicationId)) {
                execution = this.executionFacade.updateState(execution, JobState.AGGREGATING_LOGS);
                this.copyLogsFutures.put(applicationId, this.execFinalizer.copyLogs(execution));
                return null;
            }
        } catch (IOException | YarnException e) {
            Integer num = this.failures.get(execution.getAppId());
            this.failures.put(execution.getAppId(), num == null ? 1 : Integer.valueOf(num.intValue() + 1));
            LOGGER.log(Level.WARNING, "Failed to get application state for execution " + execution + ". Tried " + this.failures + " time(s).", (Throwable) e);
        }
        if (this.failures.get(execution.getAppId()) == null || this.failures.get(execution.getAppId()).intValue() <= this.maxStatusPollRetry) {
            return execution;
        }
        try {
            LOGGER.log(Level.SEVERE, "Killing application, {0}, because unable to poll for status.", execution);
            this.yarnMonitor.cancelJob(yarnClient, applicationId);
            execution = updateProgress(0.0f, updateFinalStatus(JobFinalStatus.KILLED, execution));
            this.execFinalizer.finalizeExecution(execution, JobState.KILLED);
            return null;
        } catch (YarnException | IOException e2) {
            LOGGER.log(Level.SEVERE, "Failed to cancel execution, " + execution + " after failing to poll for status.", e2);
            this.execFinalizer.finalizeExecution(execution, JobState.FRAMEWORK_FAILURE);
            return null;
        }
    }

    @Override // io.hops.hopsworks.common.jobs.JobsMonitor
    public Execution updateProgress(float f, Execution execution) {
        return this.executionUpdateController.updateProgress(f, execution);
    }

    @Override // io.hops.hopsworks.common.jobs.JobsMonitor
    public Execution updateState(JobState jobState, Execution execution) {
        return this.executionUpdateController.updateState(jobState, execution);
    }

    private Execution updateFinalStatus(JobFinalStatus jobFinalStatus, Execution execution) {
        return this.executionUpdateController.updateFinalStatusAndSendAlert(jobFinalStatus, execution);
    }
}
