package org.apache.hadoop.yarn.server.nodemanager.containermanager.sharedresourcemonitor;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.collections.MapUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.sharedresource.ProcfsBasedProcessTreeWithSharedResource;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.sharedresource.SharedResourceConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.sharedresource.gpu.GpuHealth;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.sharedresource.gpu.GpuMemoryStat;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.sharedresource.gpu.SharedGpuAPI;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ChangeMonitoringContainerResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetricsForSharedResource;
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/sharedresourcemonitor/SharedResourceContainersMonitorImpl.class */
public class SharedResourceContainersMonitorImpl extends ContainersMonitorImpl {
    private static final Logger LOG = LoggerFactory.getLogger(SharedResourceContainersMonitorImpl.class);
    private static final Logger AUDITLOG = LoggerFactory.getLogger(SharedResourceContainersMonitorImpl.class.getName() + ".audit");
    private static final String NM_GPUS_MEM_CHECK_ENABLED = "yarn.nodemanager.gpus-mem-check-enabled";
    private static final boolean DEFAULT_NM_GPUS_MEM_CHECK_ENABLED = false;
    private static final String NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD = "yarn.nodemanager.dynamic.memory.usage.threshold";
    private static final float DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD = 0.0f;
    private SharedGpuAPI gpuAPI;
    private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> trackingDockerContainers;
    private boolean gpuMemCheckEnabled;
    private NodeManagerMetricsForSharedResource nodeMetrics;
    private long usableMemoryThreshold;
    private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> pmemOverLimitedContainers;
    private Map<Integer, Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo>> gmemOverLimitedContainers;
    private Map<Integer, Long> usableGpuMemoryThreshold;
    private Map<Integer, Long> gmemStillInUsage;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.hadoop.yarn.server.nodemanager.containermanager.sharedresourcemonitor.SharedResourceContainersMonitorImpl$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/sharedresourcemonitor/SharedResourceContainersMonitorImpl$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$hadoop$yarn$server$nodemanager$containermanager$monitor$ContainersMonitorEventType = new int[ContainersMonitorEventType.values().length];

        static {
            try {
                $SwitchMap$org$apache$hadoop$yarn$server$nodemanager$containermanager$monitor$ContainersMonitorEventType[ContainersMonitorEventType.START_MONITORING_CONTAINER.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$hadoop$yarn$server$nodemanager$containermanager$monitor$ContainersMonitorEventType[ContainersMonitorEventType.STOP_MONITORING_CONTAINER.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$hadoop$yarn$server$nodemanager$containermanager$monitor$ContainersMonitorEventType[ContainersMonitorEventType.CHANGE_MONITORING_CONTAINER_RESOURCE.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
        }
    }

    /* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/sharedresourcemonitor/SharedResourceContainersMonitorImpl$SharedResourceMonitoringThread.class */
    private class SharedResourceMonitoringThread extends ContainersMonitorImpl.MonitoringThread {
        SharedResourceMonitoringThread() {
            super(SharedResourceContainersMonitorImpl.this);
        }

        public void run() {
            while (!SharedResourceContainersMonitorImpl.this.stopped && !Thread.currentThread().isInterrupted()) {
                debugLogPrint();
                ResourceUtilization newInstance = ResourceUtilization.newInstance(0, 0, SharedResourceContainersMonitorImpl.DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD);
                long j = 0;
                long j2 = 0;
                long j3 = 0;
                gmemUsageCheckClean();
                for (Map.Entry entry : SharedResourceContainersMonitorImpl.this.trackingContainers.entrySet()) {
                    ContainerId containerId = (ContainerId) entry.getKey();
                    ContainersMonitorImpl.ProcessTreeInfo processTreeInfo = (ContainersMonitorImpl.ProcessTreeInfo) entry.getValue();
                    try {
                        initializeProcessTrees(entry);
                        SharedResourceContainersMonitorImpl.this.recordMetricsTags(processTreeInfo, containerId);
                        String pid = processTreeInfo.getPID();
                        if (pid != null && SharedResourceContainersMonitorImpl.this.isResourceCalculatorAvailable()) {
                            SharedResourceContainersMonitorImpl.LOG.debug("Constructing ProcessTree for : PID = {} ContainerId = {}", pid, containerId);
                            ResourceCalculatorProcessTree processTree = processTreeInfo.getProcessTree();
                            processTree.updateProcessTree();
                            long virtualMemorySize = processTree.getVirtualMemorySize();
                            long rssMemorySize = processTree.getRssMemorySize();
                            SortedMap<Integer, GpuMemoryStat> gpuMemoryStat = SharedResourceContainersMonitorImpl.this.gpuAPI.getGpuMemoryStat();
                            HashMap hashMap = new HashMap();
                            Map<Integer, Float> gpuShareRatio = ((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo)).getGpuShareRatio();
                            long gpuMemorySize = ((ProcfsBasedProcessTreeWithSharedResource) ProcfsBasedProcessTreeWithSharedResource.class.cast(processTree)).getGpuMemorySize(gpuMemoryStat, gpuShareRatio, hashMap);
                            float cpuUsagePercent = processTree.getCpuUsagePercent();
                            if (cpuUsagePercent < SharedResourceContainersMonitorImpl.DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD) {
                                SharedResourceContainersMonitorImpl.LOG.info("Skipping monitoring container {} since CPU usage is not yet available.", containerId);
                            } else {
                                float numProcessors = cpuUsagePercent / SharedResourceContainersMonitorImpl.this.resourceCalculatorPlugin.getNumProcessors();
                                int i = (int) (((numProcessors * 1000.0f) * ((float) SharedResourceContainersMonitorImpl.this.maxVCoresAllottedForContainers)) / SharedResourceContainersMonitorImpl.this.nodeCpuPercentageForYARN);
                                long virtualMemorySize2 = processTree.getVirtualMemorySize(1);
                                long rssMemorySize2 = processTree.getRssMemorySize(1);
                                HashMap hashMap2 = new HashMap();
                                ((ProcfsBasedProcessTreeWithSharedResource) ProcfsBasedProcessTreeWithSharedResource.class.cast(processTree)).getGpuMemorySize(1, gpuMemoryStat, gpuShareRatio, hashMap2);
                                ResourceCalculatorProcessTree monitorDocker = monitorDocker(containerId, gpuMemoryStat);
                                if (monitorDocker != null) {
                                    long virtualMemorySize3 = monitorDocker.getVirtualMemorySize();
                                    virtualMemorySize += virtualMemorySize3 > 0 ? virtualMemorySize3 : 0L;
                                    long rssMemorySize3 = monitorDocker.getRssMemorySize();
                                    rssMemorySize += rssMemorySize3 > 0 ? rssMemorySize3 : 0L;
                                    long virtualMemorySize4 = monitorDocker.getVirtualMemorySize(1);
                                    virtualMemorySize2 += virtualMemorySize4 > 0 ? virtualMemorySize4 : 0L;
                                    long rssMemorySize4 = monitorDocker.getRssMemorySize(1);
                                    rssMemorySize2 += rssMemorySize4 > 0 ? rssMemorySize4 : 0L;
                                    float cpuUsagePercent2 = monitorDocker.getCpuUsagePercent();
                                    cpuUsagePercent += cpuUsagePercent2 > SharedResourceContainersMonitorImpl.DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD ? cpuUsagePercent2 : SharedResourceContainersMonitorImpl.DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD;
                                    numProcessors = cpuUsagePercent / SharedResourceContainersMonitorImpl.this.resourceCalculatorPlugin.getNumProcessors();
                                    i = (int) (((numProcessors * 1000.0f) * ((float) SharedResourceContainersMonitorImpl.this.maxVCoresAllottedForContainers)) / SharedResourceContainersMonitorImpl.this.nodeCpuPercentageForYARN);
                                    HashMap hashMap3 = new HashMap();
                                    long gpuMemorySize2 = ((ProcfsBasedProcessTreeWithSharedResource) ProcfsBasedProcessTreeWithSharedResource.class.cast(monitorDocker)).getGpuMemorySize(gpuMemoryStat, null, hashMap3);
                                    gpuMemorySize += gpuMemorySize2 > 0 ? gpuMemorySize2 : 0L;
                                    SharedResourceContainersMonitorImpl.LOG.debug("Total Gpu memory usage: {}", Long.valueOf(gpuMemorySize));
                                    mergeCurrentGmemUsage(hashMap3, hashMap);
                                    SharedResourceContainersMonitorImpl.LOG.debug("Gpu memory usage:{}", hashMap);
                                    HashMap hashMap4 = new HashMap();
                                    ((ProcfsBasedProcessTreeWithSharedResource) ProcfsBasedProcessTreeWithSharedResource.class.cast(monitorDocker)).getGpuMemorySize(1, gpuMemoryStat, null, hashMap4);
                                    mergeCurrentGmemUsage(hashMap4, hashMap2);
                                }
                                ((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo)).updateResourceUsage(rssMemorySize, virtualMemorySize, hashMap);
                                recordUsage(containerId, processTreeInfo, gpuMemorySize, cpuUsagePercent, numProcessors, i, newInstance);
                                checkLimit(containerId, processTreeInfo, virtualMemorySize2, rssMemorySize2, hashMap2);
                                j += virtualMemorySize;
                                j2 += rssMemorySize;
                                j3 = ((float) j3) + cpuUsagePercent;
                                updateGmemUsageByAllContainers(hashMap);
                                reportResourceUsage(containerId, rssMemorySize, cpuUsagePercent);
                            }
                        }
                    } catch (Exception e) {
                        SharedResourceContainersMonitorImpl.LOG.warn("Uncaught exception in SharedResourceContainersMonitorImpl while monitoring resource of " + containerId, e);
                    }
                }
                if (SharedResourceContainersMonitorImpl.LOG.isDebugEnabled()) {
                    SharedResourceContainersMonitorImpl.LOG.debug("Total Resource Usage stats in NM by all containers : Virtual Memory= {}, Physical Memory= {}, Total CPU usage(% per core)= {}", new Object[]{Long.valueOf(j), Long.valueOf(j2), Long.valueOf(j3)});
                }
                checkNodeResourceThreshold(j2);
                SharedResourceContainersMonitorImpl.this.setContainersUtilization(newInstance);
                NodeManagerMetrics nodeManagerMetrics = SharedResourceContainersMonitorImpl.this.context.getNodeManagerMetrics();
                if (nodeManagerMetrics != null) {
                    nodeManagerMetrics.setContainerUsedMemGB(newInstance.getPhysicalMemory());
                    nodeManagerMetrics.setContainerUsedVMemGB(newInstance.getVirtualMemory());
                    nodeManagerMetrics.setContainerCpuUtilization(newInstance.getCPU());
                }
                recordNodeMetrics();
                try {
                    Thread.sleep(SharedResourceContainersMonitorImpl.this.monitoringInterval);
                } catch (InterruptedException e2) {
                    SharedResourceContainersMonitorImpl.LOG.warn("{} is interrupted. Exiting.", SharedResourceContainersMonitorImpl.class.getName());
                    return;
                }
            }
        }

        private void mergeCurrentGmemUsage(Map<Integer, Long> map, Map<Integer, Long> map2) {
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                Long l = map2.get(entry.getKey());
                map2.put(entry.getKey(), l == null ? entry.getValue() : Long.valueOf(l.longValue() + entry.getValue().longValue()));
            }
        }

        private void debugLogPrint() {
            if (SharedResourceContainersMonitorImpl.LOG.isDebugEnabled()) {
                StringBuilder sb = new StringBuilder("[ ");
                Iterator it = SharedResourceContainersMonitorImpl.this.trackingContainers.values().iterator();
                while (it.hasNext()) {
                    sb.append(((ContainersMonitorImpl.ProcessTreeInfo) it.next()).getPID());
                    sb.append(" ");
                }
                SharedResourceContainersMonitorImpl.LOG.debug("Current ProcessTree list : {}]", sb.substring(0, sb.length()));
                StringBuilder sb2 = new StringBuilder("[ ");
                Iterator it2 = SharedResourceContainersMonitorImpl.this.trackingDockerContainers.values().iterator();
                while (it2.hasNext()) {
                    sb2.append(((ContainersMonitorImpl.ProcessTreeInfo) it2.next()).getPID());
                    sb2.append(" ");
                }
                SharedResourceContainersMonitorImpl.LOG.debug("Current docker container ProcessTree list : {}]", sb2.substring(0, sb2.length()));
            }
        }

        private void gmemUsageCheckClean() {
            SharedResourceContainersMonitorImpl.this.pmemOverLimitedContainers.clear();
            for (Map.Entry entry : SharedResourceContainersMonitorImpl.this.gmemOverLimitedContainers.entrySet()) {
                ((Map) entry.getValue()).clear();
                SharedResourceContainersMonitorImpl.this.gmemStillInUsage.put(entry.getKey(), 0L);
            }
        }

        private void updateGmemUsageByAllContainers(Map<Integer, Long> map) {
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                SharedResourceContainersMonitorImpl.this.gmemStillInUsage.put(entry.getKey(), Long.valueOf(entry.getValue().longValue() + ((Long) SharedResourceContainersMonitorImpl.this.gmemStillInUsage.get(entry.getKey())).longValue()));
            }
        }

        private void recordNodeMetrics() {
            int[] deviceIndexList = SharedResourceContainersMonitorImpl.this.gpuAPI.getDeviceIndexList();
            int[] iArr = new int[32];
            for (int i = 0; i < 32; i++) {
                iArr[i] = GpuHealth.HEALTH_BUTT.value();
            }
            for (int i2 : deviceIndexList) {
                iArr[i2] = SharedResourceContainersMonitorImpl.this.gpuAPI.getHealth(i2).value();
            }
            SharedResourceContainersMonitorImpl.this.nodeMetrics.recordNodeMetrics(iArr);
        }

        private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killPmemOverThreshold(long j) {
            Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> hashMap = new HashMap();
            if (SharedResourceContainersMonitorImpl.this.pmemOverLimitedContainers.size() > 0 && SharedResourceContainersMonitorImpl.this.isPmemCheckEnabled()) {
                long j2 = j - SharedResourceContainersMonitorImpl.this.usableMemoryThreshold;
                if (SharedResourceContainersMonitorImpl.LOG.isDebugEnabled()) {
                    SharedResourceContainersMonitorImpl.LOG.debug("total used:{}, all:{}, overLimitMem:{}", new Object[]{Long.valueOf(j), Long.valueOf(SharedResourceContainersMonitorImpl.this.getPmemAllocatedForContainers()), Long.valueOf(j2)});
                }
                if (j2 > 0) {
                    SharedResourceContainersMonitorImpl.LOG.warn("Total used memory on this node: {}, all memory: {}, usabel memory: {}.The configured memory threshold have been reached, and overused {}B physical memory. Node Manager will kill a few containers as physical memory consumption rate.", new Object[]{Long.valueOf(j), Long.valueOf(SharedResourceContainersMonitorImpl.this.getPmemAllocatedForContainers()), Long.valueOf(SharedResourceContainersMonitorImpl.this.usableMemoryThreshold), Long.valueOf(j2)});
                    hashMap = killMaxOverLimitContainer(j2, SharedResourceContainersMonitorImpl.this.pmemOverLimitedContainers);
                }
            }
            return hashMap;
        }

        private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killGmemOverThreshold(Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> map) {
            HashMap hashMap = new HashMap();
            if (SharedResourceContainersMonitorImpl.this.gmemOverLimitedContainers.size() <= 0 || !SharedResourceContainersMonitorImpl.this.isGpuMemCheckEnabled()) {
                return hashMap;
            }
            SortedMap<Integer, GpuMemoryStat> gpuMemoryStat = SharedResourceContainersMonitorImpl.this.gpuAPI.getGpuMemoryStat();
            Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> map2 = map;
            for (Map.Entry entry : SharedResourceContainersMonitorImpl.this.gmemOverLimitedContainers.entrySet()) {
                for (ContainersMonitorImpl.ProcessTreeInfo processTreeInfo : map2.values()) {
                    Map<Integer, Float> gpuShareRatio = ((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo)).getGpuShareRatio();
                    HashMap hashMap2 = new HashMap();
                    ((ProcfsBasedProcessTreeWithSharedResource) ProcfsBasedProcessTreeWithSharedResource.class.cast(processTreeInfo.getProcessTree())).getGpuMemorySize(gpuMemoryStat, gpuShareRatio, hashMap2);
                    for (Map.Entry entry2 : hashMap2.entrySet()) {
                        long longValue = ((Long) SharedResourceContainersMonitorImpl.this.gmemStillInUsage.get(entry2.getKey())).longValue() - ((Long) entry2.getValue()).longValue();
                        SharedResourceContainersMonitorImpl.this.gmemStillInUsage.put(entry2.getKey(), Long.valueOf(longValue > 0 ? longValue : 0L));
                    }
                }
                int intValue = ((Integer) entry.getKey()).intValue();
                GpuMemoryStat gpuMemoryStat2 = gpuMemoryStat.get(Integer.valueOf(intValue));
                if (gpuMemoryStat2 == null) {
                    SharedResourceContainersMonitorImpl.LOG.warn("Unable to get current GPU {} usage.", Integer.valueOf(intValue));
                } else {
                    long longValue2 = ((Long) SharedResourceContainersMonitorImpl.this.gmemStillInUsage.get(Integer.valueOf(intValue))).longValue() - ((Long) SharedResourceContainersMonitorImpl.this.usableGpuMemoryThreshold.get(Integer.valueOf(intValue))).longValue();
                    if (SharedResourceContainersMonitorImpl.LOG.isDebugEnabled()) {
                        SharedResourceContainersMonitorImpl.LOG.debug("Total gpu memory used for device {}: {}, all:{}, overLimitMem:{}, usableMem:{}", new Object[]{Integer.valueOf(intValue), SharedResourceContainersMonitorImpl.this.gmemStillInUsage.get(Integer.valueOf(intValue)), gpuMemoryStat2.getTotalMem(), Long.valueOf(longValue2), SharedResourceContainersMonitorImpl.this.usableGpuMemoryThreshold.get(Integer.valueOf(intValue))});
                    }
                    if (longValue2 > 0) {
                        SharedResourceContainersMonitorImpl.LOG.warn("Total gpu memory used for device {}: {}, all:{}, overLimitMem:{}, usableMem:{}The configured gpu memory threshold on this node have been reached, and overused {}B gpu memory for device {}. Node Manager will kill a few containers as gpu memory consumption rate.", new Object[]{Integer.valueOf(intValue), SharedResourceContainersMonitorImpl.this.gmemStillInUsage.get(Integer.valueOf(intValue)), gpuMemoryStat2.getTotalMem(), Long.valueOf(longValue2), SharedResourceContainersMonitorImpl.this.usableGpuMemoryThreshold.get(Integer.valueOf(intValue)), Long.valueOf(longValue2), Integer.valueOf(intValue)});
                        map2 = killMaxOverLimitContainer(longValue2, (Map) entry.getValue(), intValue);
                        hashMap.putAll(map2);
                    }
                }
            }
            return hashMap;
        }

        private void checkNodeResourceThreshold(long j) {
            Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killPmemOverThreshold = killPmemOverThreshold(j);
            Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killGmemOverThreshold = killGmemOverThreshold(killPmemOverThreshold);
            killSelectedContainers(killPmemOverThreshold, -104, "physical");
            killSelectedContainers(killGmemOverThreshold, -201, "gpu");
        }

        private void killSelectedContainers(Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> map, int i, String str) {
            for (Map.Entry<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry : map.entrySet()) {
                ContainerId key = entry.getKey();
                SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(entry.getValue());
                ResourceCalculatorProcessTree processTree = sharedResourceProcessTreeInfo.getProcessTree();
                String formatErrorMessage = formatErrorMessage(str, processTree.getVirtualMemorySize(), sharedResourceProcessTreeInfo.getVmemLimit(), sharedResourceProcessTreeInfo.pmemOverallCurrentUsage, sharedResourceProcessTreeInfo.getPmemLimit(), sharedResourceProcessTreeInfo.gpuOverallCurrentUsage, sharedResourceProcessTreeInfo.gpuLimit, sharedResourceProcessTreeInfo.getPID(), key, processTree);
                SharedResourceContainersMonitorImpl.LOG.warn(formatErrorMessage);
                if (!processTree.checkPidPgrpidForMatch()) {
                    SharedResourceContainersMonitorImpl.LOG.error("Killed container process with PID " + sharedResourceProcessTreeInfo.getPID() + " but it is not a process group leader.");
                }
                SharedResourceContainersMonitorImpl.this.eventDispatcher.getEventHandler().handle(new ContainerKillEvent(key, i, formatErrorMessage));
                SharedResourceContainersMonitorImpl.this.trackingContainers.remove(key);
                SharedResourceContainersMonitorImpl.LOG.info("Removed ProcessTree with root {}", sharedResourceProcessTreeInfo.getPID());
            }
        }

        private boolean findMaxOverLimitContainer(int i, Map.Entry<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry, MaxOverLimitContainer maxOverLimitContainer) {
            ContainerId key = entry.getKey();
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(entry.getValue());
            Map<Integer, Long> map = sharedResourceProcessTreeInfo.gpuOverallCurrentUsage;
            Long l = map != null ? map.get(Integer.valueOf(i)) : 0L;
            if (l == null) {
                l = 0L;
            }
            Map<Integer, Long> map2 = sharedResourceProcessTreeInfo.gpuLimit;
            Long l2 = map2 != null ? map2.get(Integer.valueOf(i)) : 0L;
            if (l2 == null) {
                l2 = 0L;
            }
            if (l.longValue() <= l2.longValue()) {
                return false;
            }
            if (l2.longValue() == 0) {
                maxOverLimitContainer.setMaxContainer(key);
                maxOverLimitContainer.setOverLimit(l.longValue() - l2.longValue());
                SharedResourceContainersMonitorImpl.LOG.debug("[GPU]: {} ratio: INF", key);
                return true;
            }
            float longValue = ((float) (l.longValue() - l2.longValue())) / ((float) l2.longValue());
            SharedResourceContainersMonitorImpl.LOG.debug("{}: ratio:{}, maxRatio:{}", new Object[]{key, Float.valueOf(longValue), Float.valueOf(maxOverLimitContainer.getMaxRatio())});
            if (longValue <= maxOverLimitContainer.getMaxRatio()) {
                return false;
            }
            maxOverLimitContainer.setMaxRatio(longValue);
            maxOverLimitContainer.setMaxContainer(key);
            maxOverLimitContainer.setOverLimit(l.longValue() - l2.longValue());
            return false;
        }

        private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killMaxOverLimitContainer(long j, Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> map, int i) {
            HashMap hashMap = new HashMap();
            long j2 = 0;
            MaxOverLimitContainer maxOverLimitContainer = new MaxOverLimitContainer();
            while (true) {
                maxOverLimitContainer.clean();
                Iterator<Map.Entry<ContainerId, ContainersMonitorImpl.ProcessTreeInfo>> it = map.entrySet().iterator();
                while (it.hasNext() && !findMaxOverLimitContainer(i, it.next(), maxOverLimitContainer)) {
                }
                ContainerId maxContainer = maxOverLimitContainer.getMaxContainer();
                if (maxContainer == null) {
                    break;
                }
                hashMap.put(maxContainer, map.get(maxContainer));
                j2 += maxOverLimitContainer.getOverLimit();
                if (j2 >= j) {
                    break;
                }
                map.remove(maxContainer);
            }
            if (hashMap.size() < 1) {
                SharedResourceContainersMonitorImpl.LOG.warn("Could not find over limited container in gpu {}.", Integer.valueOf(i));
            }
            return hashMap;
        }

        private Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> killMaxOverLimitContainer(long j, Map<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> map) {
            HashMap hashMap = new HashMap();
            long j2 = 0;
            while (true) {
                ContainerId containerId = null;
                float f = 0.0f;
                long j3 = 0;
                for (Map.Entry<ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry : map.entrySet()) {
                    ContainerId key = entry.getKey();
                    ContainersMonitorImpl.ProcessTreeInfo value = entry.getValue();
                    long j4 = ((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(value)).pmemOverallCurrentUsage;
                    long pmemLimit = value.getPmemLimit();
                    if (j4 > pmemLimit) {
                        float f2 = ((float) (j4 - pmemLimit)) / ((float) pmemLimit);
                        SharedResourceContainersMonitorImpl.LOG.debug("{}: ratio:{}, maxRatio:{}", new Object[]{key, Float.valueOf(f2), Float.valueOf(f)});
                        if (f2 > f) {
                            f = f2;
                            containerId = key;
                            j3 = j4 - pmemLimit;
                        }
                    }
                }
                if (containerId == null) {
                    break;
                }
                hashMap.put(containerId, map.get(containerId));
                j2 += j3;
                if (j2 >= j) {
                    break;
                }
                map.remove(containerId);
            }
            if (hashMap.size() < 1) {
                SharedResourceContainersMonitorImpl.LOG.warn("Could not find over limited container.");
            }
            return hashMap;
        }

        private void recordUsageMetrics(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, long j, float f, int i) {
            if (SharedResourceContainersMonitorImpl.this.containerMetricsEnabled) {
                SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
                long pmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getPmemOverallCurrentUsage();
                Map<Integer, Long> gpuOverallCurrentUsage = sharedResourceProcessTreeInfo.getGpuOverallCurrentUsage();
                ContainerMetricsWithSharedResource.forContainer(containerId, SharedResourceContainersMonitorImpl.this.containerMetricsPeriodMs, SharedResourceContainersMonitorImpl.this.containerMetricsUnregisterDelayMs).recordMemoryUsage((int) (pmemOverallCurrentUsage >> 20));
                ContainerMetricsWithSharedResource.forContainer(containerId, SharedResourceContainersMonitorImpl.this.containerMetricsPeriodMs, SharedResourceContainersMonitorImpl.this.containerMetricsUnregisterDelayMs).recordCpuUsage((int) f, i);
                ContainerMetricsWithSharedResource.forContainer(containerId, SharedResourceContainersMonitorImpl.this.containerMetricsPeriodMs, SharedResourceContainersMonitorImpl.this.containerMetricsUnregisterDelayMs).recordGPUMemoryUsage((int) (j >> 20));
                for (Map.Entry<Integer, Long> entry : gpuOverallCurrentUsage.entrySet()) {
                    ContainerMetricsWithSharedResource.forContainer(containerId, SharedResourceContainersMonitorImpl.this.containerMetricsPeriodMs, SharedResourceContainersMonitorImpl.this.containerMetricsUnregisterDelayMs).recordGPUMemoryUsage(entry.getKey().intValue(), (int) (entry.getValue().longValue() >> 20));
                }
            }
        }

        private void recordUsage(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, long j, float f, float f2, int i, ResourceUtilization resourceUtilization) {
            long vmemLimit = processTreeInfo.getVmemLimit();
            long pmemLimit = processTreeInfo.getPmemLimit();
            SortedMap<Integer, GpuMemoryStat> gpuMemoryStat = SharedResourceContainersMonitorImpl.this.gpuAPI.getGpuMemoryStat();
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
            Map<Integer, Long> gpuLimit = sharedResourceProcessTreeInfo.getGpuLimit(gpuMemoryStat);
            long vmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getVmemOverallCurrentUsage();
            long pmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getPmemOverallCurrentUsage();
            Map<Integer, Long> gpuOverallCurrentUsage = sharedResourceProcessTreeInfo.getGpuOverallCurrentUsage();
            if (SharedResourceContainersMonitorImpl.AUDITLOG.isDebugEnabled()) {
                SharedResourceContainersMonitorImpl.AUDITLOG.debug(String.format(Locale.ROOT, "Resource usage of ProcessTree %s for container-id %s: %s CPU:%f CPU/core:%f", processTreeInfo.getPID(), containerId.toString(), formatUsageString(vmemOverallCurrentUsage, vmemLimit, pmemOverallCurrentUsage, pmemLimit, gpuOverallCurrentUsage, gpuLimit), Float.valueOf(f), Float.valueOf(f2)));
            }
            resourceUtilization.addTo((int) (pmemOverallCurrentUsage >> 20), (int) (vmemOverallCurrentUsage >> 20), i / 1000.0f);
            recordUsageMetrics(containerId, processTreeInfo, j, f, i);
        }

        private void killContainer(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, String str, int i) {
            String pid = processTreeInfo.getPID();
            ResourceCalculatorProcessTree processTree = processTreeInfo.getProcessTree();
            SharedResourceContainersMonitorImpl.LOG.warn(str);
            if (!processTree.checkPidPgrpidForMatch()) {
                SharedResourceContainersMonitorImpl.LOG.error("Killed container process with PID {} but it is not a process group leader.", pid);
            }
            SharedResourceContainersMonitorImpl.this.eventDispatcher.getEventHandler().handle(new ContainerKillEvent(containerId, i, str));
            SharedResourceContainersMonitorImpl.this.trackingContainers.remove(containerId);
            SharedResourceContainersMonitorImpl.LOG.info("Removed ProcessTree with root {}", pid);
        }

        private void killWhenVmemOverLimit(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo) {
            String pid = processTreeInfo.getPID();
            ResourceCalculatorProcessTree processTree = processTreeInfo.getProcessTree();
            long vmemLimit = processTreeInfo.getVmemLimit();
            long pmemLimit = processTreeInfo.getPmemLimit();
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
            long vmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getVmemOverallCurrentUsage();
            killContainer(containerId, processTreeInfo, formatErrorMessage("virtual", formatUsageString(vmemOverallCurrentUsage, vmemLimit, sharedResourceProcessTreeInfo.getPmemOverallCurrentUsage(), pmemLimit), pid, containerId, processTree, vmemOverallCurrentUsage - vmemLimit), -103);
        }

        private boolean checkKillWhenPmemOverLimit(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo) {
            String pid = processTreeInfo.getPID();
            ResourceCalculatorProcessTree processTree = processTreeInfo.getProcessTree();
            long vmemLimit = processTreeInfo.getVmemLimit();
            long pmemLimit = processTreeInfo.getPmemLimit();
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
            long vmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getVmemOverallCurrentUsage();
            long pmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getPmemOverallCurrentUsage();
            long j = pmemOverallCurrentUsage - pmemLimit;
            if (SharedResourceContainersMonitorImpl.this.usableMemoryThreshold <= 0) {
                killContainer(containerId, processTreeInfo, formatErrorMessage("physical", formatUsageString(vmemOverallCurrentUsage, vmemLimit, pmemOverallCurrentUsage, pmemLimit), pid, containerId, processTree, j), -104);
                return true;
            }
            SharedResourceContainersMonitorImpl.LOG.info("PMEM OVERLIMIT container: {}", containerId);
            SharedResourceContainersMonitorImpl.this.pmemOverLimitedContainers.put(containerId, processTreeInfo);
            return false;
        }

        private void checkKillWhenGmemOverLimit(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, Map<Integer, Long> map) {
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
            int isProcessTreeOverLimit = SharedResourceContainersMonitorImpl.this.isProcessTreeOverLimit(containerId.toString(), sharedResourceProcessTreeInfo.getGpuOverallCurrentUsage(), map, sharedResourceProcessTreeInfo.getGpuLimit(SharedResourceContainersMonitorImpl.this.gpuAPI.getGpuMemoryStat()));
            if (isProcessTreeOverLimit < 0) {
                return;
            }
            if (SharedResourceContainersMonitorImpl.this.usableMemoryThreshold <= 0) {
                killContainer(containerId, processTreeInfo, "gpu over limit", -201);
            } else {
                SharedResourceContainersMonitorImpl.LOG.info("GPU memory OVERLIMIT container: {}", containerId);
                ((Map) SharedResourceContainersMonitorImpl.this.gmemOverLimitedContainers.get(Integer.valueOf(isProcessTreeOverLimit))).put(containerId, processTreeInfo);
            }
        }

        private void checkLimit(ContainerId containerId, ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, long j, long j2, Map<Integer, Long> map) {
            long vmemLimit = processTreeInfo.getVmemLimit();
            long pmemLimit = processTreeInfo.getPmemLimit();
            SharedResourceProcessTreeInfo sharedResourceProcessTreeInfo = (SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo);
            long vmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getVmemOverallCurrentUsage();
            long pmemOverallCurrentUsage = sharedResourceProcessTreeInfo.getPmemOverallCurrentUsage();
            if (SharedResourceContainersMonitorImpl.this.isVmemCheckEnabled() && SharedResourceContainersMonitorImpl.this.isProcessTreeOverLimit(containerId.toString(), vmemOverallCurrentUsage, j, vmemLimit)) {
                killWhenVmemOverLimit(containerId, processTreeInfo);
                return;
            }
            if (!(SharedResourceContainersMonitorImpl.this.isPmemCheckEnabled() && SharedResourceContainersMonitorImpl.this.isProcessTreeOverLimit(containerId.toString(), pmemOverallCurrentUsage, j2, pmemLimit) && isDockerMemoryOverLimit(containerId, pmemLimit) && checkKillWhenPmemOverLimit(containerId, processTreeInfo)) && SharedResourceContainersMonitorImpl.this.isGpuMemCheckEnabled()) {
                checkKillWhenGmemOverLimit(containerId, processTreeInfo, map);
            }
        }

        private String formatErrorMessage(String str, long j, long j2, long j3, long j4, Map<Integer, Long> map, Map<Integer, Long> map2, String str2, ContainerId containerId, ResourceCalculatorProcessTree resourceCalculatorProcessTree) {
            return String.format(Locale.ROOT, "Container [pid=%s,containerID=%s] is running beyond %s memory limits. ", str2, containerId, str) + "Current usage: " + formatUsageString(j, j2, j3, j4, map, map2) + ". Killing container.\nDump of the process-tree for " + containerId + " :\n" + resourceCalculatorProcessTree.getProcessTreeDump();
        }

        private String formatUsageString(long j, long j2, long j3, long j4, Map<Integer, Long> map, Map<Integer, Long> map2) {
            StringBuilder sb = new StringBuilder(formatUsageString(j, j2, j3, j4));
            sb.append(";");
            if (map == null || map2 == null || !map2.keySet().containsAll(map.keySet())) {
                return sb.substring(0, sb.length() - 1);
            }
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                int intValue = entry.getKey().intValue();
                long longValue = entry.getValue().longValue();
                Long l = map2.get(Integer.valueOf(intValue));
                if (longValue < 0 || l == null || l.longValue() < 0) {
                    SharedResourceContainersMonitorImpl.LOG.warn("Invalid gpu usage: GPU {} {} of {}", new Object[]{Integer.valueOf(intValue), Long.valueOf(longValue), l});
                } else {
                    sb.append(String.format(Locale.ROOT, " GPU %d: %sB of %sB memory used;", Integer.valueOf(intValue), StringUtils.TraditionalBinaryPrefix.long2String(longValue, "", 1), StringUtils.TraditionalBinaryPrefix.long2String(l.longValue(), "", 1)));
                }
            }
            return sb.substring(0, sb.length() - 1);
        }

        private long getDockerContainerRssMem(ContainerId containerId) {
            String runSystemCmd = ShellCommandUtil.runSystemCmd(new String[]{"docker", "inspect", "--format", "{{.Id}}", containerId.toString()});
            if (runSystemCmd == null || runSystemCmd.isEmpty()) {
                SharedResourceContainersMonitorImpl.LOG.warn("docker container Id is empty");
                return 0L;
            }
            SharedResourceContainersMonitorImpl.LOG.debug("dockerContainerId = {}", runSystemCmd);
            String runSystemCmd2 = ShellCommandUtil.runSystemCmd(new String[]{"cat", "/sys/fs/cgroup/memory/docker/" + runSystemCmd + "/memory.usage_in_bytes"});
            long j = 0;
            if (runSystemCmd2 == null || runSystemCmd2.isEmpty()) {
                SharedResourceContainersMonitorImpl.LOG.warn("container's memory.usage_in_bytes is empty");
            } else {
                j = Long.valueOf(runSystemCmd2).longValue();
            }
            return j;
        }

        private boolean isDockerMemoryOverLimit(ContainerId containerId, long j) {
            SharedResourceContainersMonitorImpl.LOG.debug("isDockerMemoryOverLimit start, containerName = {} pmemLimit = {}", containerId, Long.valueOf(j));
            long dockerContainerRssMem = getDockerContainerRssMem(containerId);
            SharedResourceContainersMonitorImpl.LOG.debug("pMemForDocker: {} pmemLimit: {}", Long.valueOf(dockerContainerRssMem), Long.valueOf(j));
            if (dockerContainerRssMem <= j) {
                return false;
            }
            SharedResourceContainersMonitorImpl.LOG.warn("docker container: {} current memory: {} over limit: {}", new Object[]{containerId, Long.valueOf(dockerContainerRssMem), Long.valueOf(j)});
            return true;
        }

        private String getDockerRootPID(ContainerId containerId) {
            return ShellCommandUtil.runSystemCmd(new String[]{"docker", "inspect", "--format", "{{ .State.Pid }}", containerId.toString()});
        }

        private ResourceCalculatorProcessTree monitorDocker(ContainerId containerId, Map<Integer, GpuMemoryStat> map) {
            ContainersMonitorImpl.ProcessTreeInfo processTreeInfo = (ContainersMonitorImpl.ProcessTreeInfo) SharedResourceContainersMonitorImpl.this.trackingDockerContainers.get(containerId);
            if (processTreeInfo == null) {
                return null;
            }
            String pid = processTreeInfo.getPID();
            if (pid == null) {
                pid = getDockerRootPID(containerId);
                if (pid != null) {
                    SharedResourceContainersMonitorImpl.LOG.info("Tracking docker ProcessTree {} for the first time, ContainerID={}", pid, containerId);
                    ResourceCalculatorProcessTree resourceCalculatorProcessTree = ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(pid, SharedResourceContainersMonitorImpl.this.processTreeClass, SharedResourceContainersMonitorImpl.this.conf);
                    processTreeInfo.setPid(pid);
                    processTreeInfo.setProcessTree(resourceCalculatorProcessTree);
                }
            }
            if (pid == null) {
                return null;
            }
            Logger logger = SharedResourceContainersMonitorImpl.LOG;
            Object[] objArr = new Object[3];
            objArr[0] = containerId;
            objArr[1] = pid;
            objArr[2] = processTreeInfo.getProcessTree() != null ? processTreeInfo.getProcessTree() : "[]";
            logger.debug("Current ProcessTree for docker container of container {} : {} -> {}", objArr);
            SharedResourceContainersMonitorImpl.LOG.debug("Constructing ProcessTree for : PID = {} of docker container with ContainerID={}", pid, containerId);
            ResourceCalculatorProcessTree processTree = processTreeInfo.getProcessTree();
            processTree.updateProcessTree();
            long virtualMemorySize = processTree.getVirtualMemorySize();
            long rssMemorySize = processTree.getRssMemorySize();
            HashMap hashMap = new HashMap();
            long vmemLimit = processTreeInfo.getVmemLimit();
            long pmemLimit = processTreeInfo.getPmemLimit();
            Map<Integer, Long> gpuLimit = ((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo)).getGpuLimit(map);
            if (SharedResourceContainersMonitorImpl.LOG.isDebugEnabled()) {
                SharedResourceContainersMonitorImpl.LOG.debug(String.format(Locale.ROOT, "Memory usage of ProcessTree %s for docker with container-id %s: ", pid, containerId.toString()) + formatUsageString(virtualMemorySize, vmemLimit, rssMemorySize, pmemLimit, hashMap, gpuLimit));
            }
            return processTree;
        }
    }

    /* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/sharedresourcemonitor/SharedResourceContainersMonitorImpl$SharedResourceProcessTreeInfo.class */
    public static class SharedResourceProcessTreeInfo extends ContainersMonitorImpl.ProcessTreeInfo {
        long pmemOverallCurrentUsage;
        long vmemOverallCurrentUsage;
        Map<Integer, Long> gpuOverallCurrentUsage;
        Map<Integer, Long> gpuLimit;
        private Map<Integer, Float> gpuShareRatio;
        private Map<String, String> containerEnv;

        SharedResourceProcessTreeInfo(ContainerId containerId, String str, ResourceCalculatorProcessTree resourceCalculatorProcessTree, long j, long j2, int i, Map<String, String> map) {
            super(containerId, str, resourceCalculatorProcessTree, j, j2, i);
            this.gpuLimit = null;
            this.gpuShareRatio = null;
            this.containerEnv = map;
        }

        public long getPmemOverallCurrentUsage() {
            return this.pmemOverallCurrentUsage;
        }

        public long getVmemOverallCurrentUsage() {
            return this.vmemOverallCurrentUsage;
        }

        public Map<Integer, Long> getGpuOverallCurrentUsage() {
            return this.gpuOverallCurrentUsage;
        }

        Map<Integer, Float> getGpuShareRatio() {
            if (this.gpuShareRatio != null) {
                return this.gpuShareRatio;
            }
            this.gpuShareRatio = getGpuShare(this.containerEnv);
            return this.gpuShareRatio;
        }

        void updateResourceUsage(long j, long j2, Map<Integer, Long> map) {
            this.pmemOverallCurrentUsage = j;
            this.vmemOverallCurrentUsage = j2;
            this.gpuOverallCurrentUsage = map;
        }

        private Map<Integer, Float> getGpuShare(Map<String, String> map) {
            String str;
            SharedResourceContainersMonitorImpl.LOG.debug("Get container env: {}", map);
            if (!this.containerEnv.containsKey(SharedResourceConstants.DTF_RESOURCE_TYPE) || !this.containerEnv.containsKey(SharedResourceConstants.GPU_ALLOCATION_RATIO) || (str = this.containerEnv.get(SharedResourceConstants.GPU_ALLOCATION_RATIO)) == null || str.isEmpty()) {
                return null;
            }
            HashMap hashMap = new HashMap();
            for (String str2 : str.split(",")) {
                try {
                    String[] split = str2.split(":");
                    hashMap.put(Integer.valueOf(Integer.parseInt(split[0])), Float.valueOf(Float.parseFloat(split[1])));
                } catch (Exception e) {
                    SharedResourceContainersMonitorImpl.LOG.warn("get gpu info from hint error, hint:{}", map);
                }
            }
            SharedResourceContainersMonitorImpl.LOG.info("GPU share {} -> {}", str, hashMap);
            return hashMap;
        }

        Map<Integer, Long> getGpuLimit(Map<Integer, GpuMemoryStat> map) {
            HashMap hashMap = new HashMap();
            Map<Integer, Float> gpuShareRatio = getGpuShareRatio();
            if (gpuShareRatio == null || map.isEmpty()) {
                this.gpuLimit = hashMap;
                return this.gpuLimit;
            }
            for (Map.Entry<Integer, Float> entry : gpuShareRatio.entrySet()) {
                int intValue = entry.getKey().intValue();
                float floatValue = entry.getValue().floatValue();
                GpuMemoryStat gpuMemoryStat = map.get(Integer.valueOf(intValue));
                if (gpuMemoryStat == null) {
                    SharedResourceContainersMonitorImpl.LOG.warn("GPU {} not in list of available GPUs {}", Integer.valueOf(intValue), map.keySet());
                } else {
                    hashMap.put(Integer.valueOf(intValue), Long.valueOf(floatValue * ((float) gpuMemoryStat.getTotalMem().longValue())));
                    SharedResourceContainersMonitorImpl.LOG.debug("gpu index: {}, shareRatio: {}, totalMem: {}", new Object[]{Integer.valueOf(intValue), Float.valueOf(floatValue), gpuMemoryStat.getTotalMem()});
                }
            }
            this.gpuLimit = hashMap;
            return this.gpuLimit;
        }
    }

    public SharedResourceContainersMonitorImpl(ContainerExecutor containerExecutor, AsyncDispatcher asyncDispatcher, Context context) {
        super(containerExecutor, asyncDispatcher, context);
        this.trackingDockerContainers = new ConcurrentHashMap();
        this.usableMemoryThreshold = 0L;
        this.pmemOverLimitedContainers = new HashMap();
        this.gmemOverLimitedContainers = new HashMap();
        this.usableGpuMemoryThreshold = new HashMap();
        this.gmemStillInUsage = new HashMap();
        this.monitoringThread = new SharedResourceMonitoringThread();
    }

    protected void serviceInit(Configuration configuration) throws Exception {
        super.serviceInit(configuration);
        this.gpuMemCheckEnabled = configuration.getBoolean(NM_GPUS_MEM_CHECK_ENABLED, false);
        this.processTreeClass = ProcfsBasedProcessTreeWithSharedResource.class;
        this.nodeMetrics = NodeManagerMetricsForSharedResource.getInstance();
        this.gpuAPI = SharedGpuAPI.getInstance();
        long pmemAllocatedForContainers = getPmemAllocatedForContainers();
        float f = this.conf.getFloat(NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD, DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD);
        if (f <= DEFAULT_NM_CONTAINER_DYNA_MEM_USAGE_THRESHOLD || f >= 1.0f) {
            return;
        }
        this.usableMemoryThreshold = f * ((float) pmemAllocatedForContainers);
        LOG.info("Memory threshold: {}, all memory: {}, usable memory: {}", new Object[]{Float.valueOf(f), Long.valueOf(pmemAllocatedForContainers), Long.valueOf(this.usableMemoryThreshold)});
        for (Map.Entry<Integer, GpuMemoryStat> entry : this.gpuAPI.getGpuMemoryStat().entrySet()) {
            long longValue = ((float) entry.getValue().getTotalMem().longValue()) * f;
            this.usableGpuMemoryThreshold.put(entry.getKey(), Long.valueOf(longValue));
            this.gmemOverLimitedContainers.put(entry.getKey(), new HashMap());
            this.gmemStillInUsage.put(entry.getKey(), 0L);
            LOG.info("Gpu {} memory threshold: {}, all memory: {}, usable memory: {}", new Object[]{entry.getKey(), Float.valueOf(f), entry.getValue().getTotalMem(), Long.valueOf(longValue)});
        }
    }

    protected void serviceStart() throws Exception {
        if (this.containersMonitorEnabled) {
            this.monitoringThread.start();
        }
    }

    protected void serviceStop() throws Exception {
        if (this.containersMonitorEnabled) {
            this.stopped = true;
            this.monitoringThread.interrupt();
            try {
                this.monitoringThread.join();
            } catch (InterruptedException e) {
                LOG.warn("SharedResourceContainersMonitorImpl monitoring thread interrupted");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public int isProcessTreeOverLimit(String str, Map<Integer, Long> map, Map<Integer, Long> map2, Map<Integer, Long> map3) {
        if (MapUtils.isEmpty(map) || MapUtils.isEmpty(map2) || map3 == null) {
            return -1;
        }
        for (Map.Entry<Integer, Long> entry : map3.entrySet()) {
            Integer key = entry.getKey();
            Long value = entry.getValue();
            Long l = map.get(key);
            Long l2 = map2.get(key);
            if (l == null || l2 == null) {
                LOG.warn("Unable to get current GPU {} usage for container {}", key, str);
            } else if (isProcessTreeOverLimit(str, l.longValue(), l2.longValue(), value.longValue())) {
                return key.intValue();
            }
        }
        return -1;
    }

    protected void updateContainerMetrics(ContainersMonitorEvent containersMonitorEvent) {
        if (!this.containerMetricsEnabled || containersMonitorEvent == null) {
            return;
        }
        ContainerId containerId = containersMonitorEvent.getContainerId();
        switch (AnonymousClass1.$SwitchMap$org$apache$hadoop$yarn$server$nodemanager$containermanager$monitor$ContainersMonitorEventType[containersMonitorEvent.getType().ordinal()]) {
            case 1:
                ContainerMetricsWithSharedResource forContainer = ContainerMetricsWithSharedResource.forContainer(containerId, this.containerMetricsPeriodMs, this.containerMetricsUnregisterDelayMs);
                ContainerStartMonitoringEvent containerStartMonitoringEvent = (ContainerStartMonitoringEvent) ContainerStartMonitoringEvent.class.cast(containersMonitorEvent);
                forContainer.recordStateChangeDurations(containerStartMonitoringEvent.getLaunchDuration(), containerStartMonitoringEvent.getLocalizationDuration());
                forContainer.recordResourceLimit((int) (containerStartMonitoringEvent.getVmemLimit() >> 20), (int) (containerStartMonitoringEvent.getPmemLimit() >> 20), containerStartMonitoringEvent.getCpuVcores());
                return;
            case 2:
                ContainerMetricsWithSharedResource.forContainer(containerId, this.containerMetricsPeriodMs, this.containerMetricsUnregisterDelayMs).finished();
                super.updateContainerMetrics(containersMonitorEvent);
                return;
            case 3:
                ContainerMetricsWithSharedResource forContainer2 = ContainerMetricsWithSharedResource.forContainer(containerId, this.containerMetricsPeriodMs, this.containerMetricsUnregisterDelayMs);
                Resource resource = ((ChangeMonitoringContainerResourceEvent) ChangeMonitoringContainerResourceEvent.class.cast(containersMonitorEvent)).getResource();
                int memorySize = (int) resource.getMemorySize();
                forContainer2.recordResourceLimit((int) (memorySize * vmemRatio), memorySize, resource.getVirtualCores());
                return;
            default:
                return;
        }
    }

    public boolean isGpuMemCheckEnabled() {
        return this.gpuMemCheckEnabled;
    }

    protected void onStopMonitoringContainer(ContainersMonitorEvent containersMonitorEvent, ContainerId containerId) {
        super.onStopMonitoringContainer(containersMonitorEvent, containerId);
        this.trackingDockerContainers.remove(containerId);
    }

    protected void onStartMonitoringContainer(ContainersMonitorEvent containersMonitorEvent, ContainerId containerId) {
        ContainerStartMonitoringEvent containerStartMonitoringEvent = (ContainerStartMonitoringEvent) ContainerStartMonitoringEvent.class.cast(containersMonitorEvent);
        LOG.info("Starting resource-monitoring for {}", containerId);
        updateContainerMetrics(containersMonitorEvent);
        Map environment = ((Container) this.context.getContainers().get(containerId)).getLaunchContext().getEnvironment();
        this.trackingContainers.put(containerId, new SharedResourceProcessTreeInfo(containerId, null, null, containerStartMonitoringEvent.getVmemLimit(), containerStartMonitoringEvent.getPmemLimit(), containerStartMonitoringEvent.getCpuVcores(), environment));
        if (isDockerWrapper(containerId)) {
            this.trackingDockerContainers.put(containerId, new SharedResourceProcessTreeInfo(containerId, null, null, containerStartMonitoringEvent.getVmemLimit(), containerStartMonitoringEvent.getPmemLimit(), containerStartMonitoringEvent.getCpuVcores(), environment));
        }
    }

    private boolean isDockerWrapper(ContainerId containerId) {
        String str = (String) ((Container) this.context.getContainers().get(containerId)).getLaunchContext().getEnvironment().get(SharedResourceConstants.BATCH_DOCKER_IMAGE);
        LOG.info("Docker Image for container {} : {}", containerId, str);
        return (str == null || str.isEmpty()) ? false : true;
    }

    private long getAggregatedGpuLimit(Map<Integer, Long> map) {
        long j = 0;
        if (map == null) {
            return 0L;
        }
        Iterator<Long> it = map.values().iterator();
        while (it.hasNext()) {
            j += it.next().longValue();
        }
        return j;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void recordMetricsTags(ContainersMonitorImpl.ProcessTreeInfo processTreeInfo, ContainerId containerId) {
        if (this.containerMetricsEnabled) {
            ContainerMetricsWithSharedResource forContainer = ContainerMetricsWithSharedResource.forContainer(containerId, this.containerMetricsPeriodMs, this.containerMetricsUnregisterDelayMs);
            forContainer.recordGpuMemLimit((int) (getAggregatedGpuLimit(((SharedResourceProcessTreeInfo) SharedResourceProcessTreeInfo.class.cast(processTreeInfo)).getGpuLimit(this.gpuAPI.getGpuMemoryStat())) >> 20));
            forContainer.recordApplicationId(containerId.getApplicationAttemptId().getApplicationId().toString());
            Map environment = ((Container) this.context.getContainers().get(containerId)).getLaunchContext().getEnvironment();
            LOG.debug("env:{}", environment);
            boolean containsKey = environment.containsKey(SharedResourceConstants.BATCH_SUBTASK_NAME);
            if (containsKey || !containerId.toString().endsWith("0001")) {
                if (containsKey) {
                    forContainer.recordTaskName(environment.get(SharedResourceConstants.BATCH_SUBTASK_NAME).toString());
                }
                if (environment.containsKey(SharedResourceConstants.BATCH_SUBTASK_INDEX)) {
                    forContainer.recordTaskIndex(environment.get(SharedResourceConstants.BATCH_SUBTASK_INDEX).toString());
                }
                if (environment.containsKey(SharedResourceConstants.YARN_GPU_DEVICES)) {
                    forContainer.recordAllocatedGpus(environment.get(SharedResourceConstants.YARN_GPU_DEVICES).toString());
                }
                for (int i : this.gpuAPI.getDeviceIndexList()) {
                    forContainer.recordGpuModel(i, this.gpuAPI.getModelName(i));
                }
            }
        }
    }
}
